pi-crew 0.5.25 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +99 -0
- package/README.md +13 -11
- package/docs/patterns/command-agent-skill.md +71 -0
- package/package.json +1 -1
- package/skills/council/SKILL.md +163 -0
- package/src/agents/agent-config.ts +4 -1
- package/src/agents/discover-agents.ts +1 -0
- package/src/benchmark/feedback-loop.ts +4 -2
- package/src/extension/cross-extension-rpc.ts +48 -0
- package/src/extension/registration/commands.ts +2 -1
- package/src/extension/registration/subagent-tools.ts +2 -0
- package/src/extension/registration/team-tool.ts +2 -0
- package/src/extension/registration/viewers.ts +1 -0
- package/src/extension/run-export.ts +16 -1
- package/src/extension/run-import.ts +16 -0
- package/src/extension/team-tool/anchor.ts +5 -1
- package/src/extension/team-tool/api.ts +9 -4
- package/src/extension/team-tool/config-patch.ts +15 -1
- package/src/extension/team-tool.ts +2 -1
- package/src/hooks/registry.ts +9 -1
- package/src/hooks/types.ts +14 -0
- package/src/i18n.ts +15 -2
- package/src/observability/exporters/otlp-exporter.ts +73 -0
- package/src/runtime/adaptive-plan.ts +24 -0
- package/src/runtime/agent-control.ts +6 -3
- package/src/runtime/async-runner.ts +58 -3
- package/src/runtime/background-runner.ts +1 -1
- package/src/runtime/chain-parser.ts +192 -0
- package/src/runtime/chain-runner.ts +58 -0
- package/src/runtime/child-pi.ts +1 -1
- package/src/runtime/crew-agent-records.ts +4 -3
- package/src/runtime/cross-extension-rpc.ts +34 -8
- package/src/runtime/diagnostic-export.ts +3 -4
- package/src/runtime/dynamic-script-runner.ts +7 -7
- package/src/runtime/foreground-watchdog.ts +2 -2
- package/src/runtime/intercom-bridge.ts +178 -0
- package/src/runtime/live-agent-manager.ts +6 -3
- package/src/runtime/live-irc.ts +4 -2
- package/src/runtime/parallel-utils.ts +2 -1
- package/src/runtime/plan-templates.ts +200 -0
- package/src/runtime/post-checks.ts +10 -3
- package/src/runtime/run-drift.ts +220 -0
- package/src/runtime/sandbox.ts +26 -20
- package/src/runtime/semaphore.ts +2 -1
- package/src/runtime/settings-store.ts +14 -2
- package/src/runtime/skill-effectiveness.ts +4 -2
- package/src/runtime/skill-instructions.ts +4 -1
- package/src/runtime/subagent-manager.ts +20 -2
- package/src/runtime/subprocess-tool-registry.ts +2 -2
- package/src/runtime/task-graph.ts +79 -0
- package/src/runtime/task-id.ts +148 -0
- package/src/runtime/task-packet.ts +13 -1
- package/src/runtime/task-runner/context-retrieval.ts +172 -0
- package/src/runtime/task-runner.ts +39 -1
- package/src/runtime/team-runner.ts +7 -0
- package/src/runtime/usage-tracker.ts +4 -2
- package/src/runtime/verification-gates.ts +36 -9
- package/src/state/contracts.ts +2 -1
- package/src/state/event-log.ts +16 -5
- package/src/state/hook-instinct-bridge.ts +2 -1
- package/src/state/locks.ts +9 -2
- package/src/state/memory-store.ts +244 -0
- package/src/state/observation-store.ts +177 -0
- package/src/state/state-store.ts +4 -2
- package/src/state/task-claims.ts +9 -2
- package/src/tools/safe-bash.ts +69 -20
- package/src/types/new-api-types.ts +10 -5
- package/src/ui/keybinding-map.ts +2 -1
- package/src/ui/run-action-dispatcher.ts +2 -1
- package/src/ui/status-colors.ts +2 -1
- package/src/ui/syntax-highlight.ts +2 -1
- package/src/ui/tool-render.ts +13 -3
- package/src/utils/fingerprint.ts +183 -0
- package/src/utils/fs-watch.ts +4 -2
- package/src/utils/gh-protocol.ts +2 -1
- package/src/utils/safe-paths.ts +6 -0
- package/src/workflows/discover-workflows.ts +5 -1
- package/src/workflows/intermediate-store.ts +173 -0
- package/src/workflows/workflow-config.ts +8 -0
- package/src/worktree/cleanup.ts +8 -5
- package/src/worktree/worktree-manager.ts +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,70 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.6.1] — Post-v0.6.0 Security Hardening + Test Coverage (2026-06-04)
|
|
4
|
+
|
|
5
|
+
### Highlights
|
|
6
|
+
- **42+ security issues fixed** — 7 CRITICAL, 10 HIGH, 11 MEDIUM, 14 post-restart review findings
|
|
7
|
+
- **~1,900 new tests** across 113+ test files — total suite now ~4,600 tests
|
|
8
|
+
- **38 dead exports cleaned** across 19 modules
|
|
9
|
+
- **12 `any` types replaced** with proper TypeScript types
|
|
10
|
+
- **Full battle-testing** — 2 Pi restart cycles, all team types, management operations verified
|
|
11
|
+
|
|
12
|
+
### Security Fixes (CRITICAL)
|
|
13
|
+
- `async-runner.ts`: Environment variable leak in child process — sanitized with `sanitizeEnvSecrets()`
|
|
14
|
+
- `verification-gates.ts`: Shell injection via user-controlled strings — switched to `execFileSync`
|
|
15
|
+
- `sandbox.ts`: `String.fromCharCode` bypass — added `constructor` to `FORBIDDEN_PATTERNS`
|
|
16
|
+
- `locks.ts`: Timing-unsafe comparison on lock tokens — replaced with constant-time compare
|
|
17
|
+
- `event-log.ts`: Request IDs logged in plaintext — now hashed before logging
|
|
18
|
+
- `team-runner.ts`: Missing heartbeat for long-running tasks — added 30s heartbeat writer
|
|
19
|
+
- `worktree-manager.ts`: Environment secrets leaked to git subprocesses — `sanitizeEnvSecrets()`
|
|
20
|
+
|
|
21
|
+
### Security Fixes (HIGH)
|
|
22
|
+
- `preStepScript` symlink traversal — `fs.realpathSync` before path containment check
|
|
23
|
+
- `childEnvAllowList` wildcard patterns (`LC_*`, `XDG_*`) could leak secrets
|
|
24
|
+
- Event log sync/async race condition — route sync `appendEvent` through async queue
|
|
25
|
+
- Subagent record validation — `sanitizePersistedRecord()` with allow-listed fields
|
|
26
|
+
- Verification gate redirect — allow single `>` for `2>&1`, block `>>` and `<[^&]`
|
|
27
|
+
- `allowPatterns` validation — reject patterns matching empty strings
|
|
28
|
+
|
|
29
|
+
### Security Fixes (MEDIUM)
|
|
30
|
+
- `logInternalError` import paths normalized across all modules
|
|
31
|
+
- `Object.freeze()` narrowing fix — use `Readonly<{...}>` explicit types
|
|
32
|
+
- NTFS mtime granularity — write-first, `utimes`-after for cache invalidation
|
|
33
|
+
- Windows path separators — platform-agnostic assertions in tests
|
|
34
|
+
- `executeUnchecked` visibility — `__test_executeUnchecked` export pattern
|
|
35
|
+
- `seedPaths` containment — `normalizeSeedPaths()` validates paths stay within `repoRoot`
|
|
36
|
+
|
|
37
|
+
### Code Quality
|
|
38
|
+
- 38 dead/unused exports removed across 19 source modules
|
|
39
|
+
- 12 `any` types replaced with proper interfaces
|
|
40
|
+
- `enforceLabelCap` MRU correctness — `delete`-then-`set` to maintain Map insertion order
|
|
41
|
+
- `readIfSmall` bounded reads — `Buffer.alloc` + `fs.readSync` instead of `readFileSync`
|
|
42
|
+
|
|
43
|
+
### Test Coverage
|
|
44
|
+
- 113 new test files, ~1,900 new test cases
|
|
45
|
+
- Modules now covered: config, extension, workflow, subagent, observability, runtime, graph,
|
|
46
|
+
heartbeat, permissions, state, locks, event-log, safe-bash, sandbox, verification-gates,
|
|
47
|
+
async-runner, team-runner, background-runner, worktree, fingerprint, BM25 search, and more
|
|
48
|
+
- Windows CI verified: path separators, `npx.cmd` resolution, NTFS mtime all pass
|
|
49
|
+
- Test runner wrapper (`scripts/test-runner.mjs`) ensures non-zero exit on failures
|
|
50
|
+
|
|
51
|
+
### Stats
|
|
52
|
+
- Test suite: ~4,600 pass, 0 fail
|
|
53
|
+
- TypeScript: 0 errors
|
|
54
|
+
- Lines added since v0.6.0: 22,520 (742 src + 21,777 test)
|
|
55
|
+
- Files changed: 204
|
|
56
|
+
- Security issues fixed: 42+
|
|
57
|
+
- Audit rounds: 42 (including post-v0.6.0 battle-testing)
|
|
58
|
+
|
|
59
|
+
## [0.6.0] — Source Tour Patterns + 15 New Modules (2026-06-03)
|
|
60
|
+
|
|
61
|
+
### Highlights
|
|
62
|
+
- **15 upstream patterns implemented** from 63-repository source tour
|
|
63
|
+
- **10 new source modules** (2,267 LOC): chain-parser, run-drift, intercom-bridge,
|
|
64
|
+
plan-templates, task-id, context-retrieval, intermediate-store, fingerprint,
|
|
65
|
+
memory-store, observation-store
|
|
66
|
+
- **37 skills reviewed** with origin fields, all passing validation
|
|
67
|
+
|
|
3
68
|
## [0.5.22] — Remaining Issues from Ultimate Sweep (2026-06-03)
|
|
4
69
|
|
|
5
70
|
### Highlights
|
|
@@ -1401,3 +1466,37 @@ correctness+error-handling, and performance+architecture audits across 77 source
|
|
|
1401
1466
|
|
|
1402
1467
|
### CI
|
|
1403
1468
|
- `.github/workflows/ci.yml`: typecheck step re-enabled (was disabled since v0.3.x)
|
|
1469
|
+
|
|
1470
|
+
## [0.6.0] — Source Tour Patterns Implementation (2026-06-04)
|
|
1471
|
+
|
|
1472
|
+
### Highlights
|
|
1473
|
+
- **15 patterns** implemented from 63-repo source tour (2,267 LOC)
|
|
1474
|
+
- All patterns pass TypeScript strict mode with 0 errors
|
|
1475
|
+
- 37 skills (including new council skill)
|
|
1476
|
+
|
|
1477
|
+
### Tier 1 — Quick Wins
|
|
1478
|
+
- **Council skill** (Pattern 5): 3 adversarial roles for critical decisions
|
|
1479
|
+
- **6 lifecycle hooks** (Pattern 12): after_run_complete, after_task_complete, session hooks
|
|
1480
|
+
- **3-tier convention** (Pattern 13): Command→Agent→Skill documentation + effort field
|
|
1481
|
+
- **Pre-step scripts** (Pattern 2): Deterministic scripts before LLM dispatch
|
|
1482
|
+
- **Chain DSL parser** (Pattern 8): step1 -> parallel(step2, step3) -> step4
|
|
1483
|
+
|
|
1484
|
+
### Tier 2 — Medium-Term
|
|
1485
|
+
- **DAG enhancements** (Pattern 7): findBlockedTasks, getBlockingTasks, topologicalSort
|
|
1486
|
+
- **Drift detection** (Pattern 10): 5 detectors, 2-pass reconciliation
|
|
1487
|
+
- **Hash-based task IDs** (Pattern 11): Base36 + adaptive length + hierarchical
|
|
1488
|
+
- **Iterative retrieval** (Pattern 6): Score → converge → refine loop
|
|
1489
|
+
- **Intercom bridge** (Pattern 9): Worker→orchestrator escalation queue
|
|
1490
|
+
- **Plan templates** (Pattern 15): Built-in standard-review and full-implementation
|
|
1491
|
+
|
|
1492
|
+
### Tier 3 — Long-Term
|
|
1493
|
+
- **Phase-gated intermediates** (Pattern 1): Disk-persistent step outputs
|
|
1494
|
+
- **Incremental fingerprinting** (Pattern 3): Content hash + structural signature
|
|
1495
|
+
- **4-tier memory** (Pattern 4): Working→Episodic→Semantic→Procedural with Ebbinghaus decay
|
|
1496
|
+
- **Observation system** (Pattern 14): Capture→compress→re-inject with privacy tags
|
|
1497
|
+
|
|
1498
|
+
### Stats
|
|
1499
|
+
- Test suite: 2698 pass + 1 skip, 0 fail
|
|
1500
|
+
- TypeScript: 0 errors
|
|
1501
|
+
- Skills: 37/37 PASS
|
|
1502
|
+
- New modules: 11 files, 2,267 LOC
|
package/README.md
CHANGED
|
@@ -9,22 +9,24 @@ npm: pi-crew
|
|
|
9
9
|
repo: https://github.com/baphuongna/pi-crew
|
|
10
10
|
```
|
|
11
11
|
|
|
12
|
-
**v0.
|
|
12
|
+
**v0.6.1**: See [CHANGELOG.md](CHANGELOG.md).
|
|
13
13
|
|
|
14
|
-
### Security highlights (v0.
|
|
14
|
+
### Security highlights (v0.6.1)
|
|
15
15
|
|
|
16
|
-
- **
|
|
17
|
-
- **
|
|
18
|
-
- **
|
|
19
|
-
- **Shell injection
|
|
16
|
+
- **42+ security issues fixed** — 7 CRITICAL, 10 HIGH, 11 MEDIUM, 14 post-restart findings
|
|
17
|
+
- **Timing-safe token comparison** — constant-time compare for lock tokens and request IDs
|
|
18
|
+
- **Environment leak prevention** — `sanitizeEnvSecrets()` on all child process spawns
|
|
19
|
+
- **Shell injection hardened** — `execFileSync` with array args; blocked `String.fromCharCode` bypass
|
|
20
|
+
- **ReDoS-free secret redaction** — linear-time scanning in `redaction.ts`
|
|
21
|
+
- **Sandbox prototype isolation** — `Object.freeze` scoped to VM context; `constructor` pattern blocked
|
|
22
|
+
- **Symlink traversal prevention** — `fs.realpathSync` before path containment checks
|
|
20
23
|
- **Safe-bash line-continuation hardening** — `$\n(evil)` command substitution bypass blocked
|
|
21
|
-
- **Sandbox prototype isolation** — `Object.freeze` scoped to VM context (not host process)
|
|
22
24
|
- **Path traversal mitigated** — `resolveContainedPath`/`resolveRealContainedPath` across all file ops
|
|
23
|
-
- **TOCTOU-free file ops** — atomic `mkdirSync` in `crew-init.ts`; `realpath`-based path validation
|
|
24
25
|
- **Memory leaks capped** — Maps, Sets, arrays bounded with eviction across all modules
|
|
25
|
-
- **
|
|
26
|
-
- **
|
|
27
|
-
-
|
|
26
|
+
- **Event log race conditions fixed** — sync/async queue unification
|
|
27
|
+
- **Subagent record sanitization** — allow-listed field persistence
|
|
28
|
+
- **~1,900 new tests**, 113 test files — total suite ~4,600 tests, 0 failures
|
|
29
|
+
- **42+ audit rounds, 160+ issues fixed** across all severity levels
|
|
28
30
|
|
|
29
31
|
See [SECURITY-ISSUES.md](SECURITY-ISSUES.md) for the full list (SEC-001 – SEC-007 all marked fixed).
|
|
30
32
|
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Command → Agent → Skill: 3-Tier Pattern
|
|
2
|
+
|
|
3
|
+
> **Origin**: `source/claude-code-best-practice/CLAUDE.md`
|
|
4
|
+
> **Applicable to**: pi-crew v0.5.25+
|
|
5
|
+
|
|
6
|
+
## The 3 Tiers
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
User invokes → [Command] → [Agent] → [Skill]
|
|
10
|
+
entry worker reusable
|
|
11
|
+
point + tools capability
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
### Tier 1: Command (Entry Point)
|
|
15
|
+
- Maps user intent to an agent
|
|
16
|
+
- Defined in workflow `.md` files as a step
|
|
17
|
+
- Example: `team action='run' team='review'`
|
|
18
|
+
|
|
19
|
+
### Tier 2: Agent (Specialized Worker)
|
|
20
|
+
- Has a system prompt, model, tools, skills, effort level
|
|
21
|
+
- Defined as `.md` file with YAML frontmatter in `agents/` directory
|
|
22
|
+
- Example:
|
|
23
|
+
|
|
24
|
+
```markdown
|
|
25
|
+
---
|
|
26
|
+
name: security-reviewer
|
|
27
|
+
description: Chief Security Officer who finds OWASP Top 10 threats
|
|
28
|
+
tools: read, bash, edit
|
|
29
|
+
model: claude-sonnet-4-20250514
|
|
30
|
+
effort: high
|
|
31
|
+
skills: safe-bash, security-review
|
|
32
|
+
maxTurns: 30
|
|
33
|
+
contextMode: fresh
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
You are a Chief Security Officer...
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Tier 3: Skill (Reusable Capability)
|
|
40
|
+
- A `SKILL.md` file with instructions + operating rules
|
|
41
|
+
- Injected into agent context at dispatch time
|
|
42
|
+
- Example: `skills/safe-bash/SKILL.md`
|
|
43
|
+
|
|
44
|
+
## How to Compose
|
|
45
|
+
|
|
46
|
+
1. **Define the skill** — Create `skills/my-skill/SKILL.md`
|
|
47
|
+
2. **Define the agent** — Create `agents/my-agent.md` referencing the skill
|
|
48
|
+
3. **Define the workflow** — Create `workflows/my-workflow.workflow.md` referencing the agent as a step
|
|
49
|
+
|
|
50
|
+
```yaml
|
|
51
|
+
# workflows/my-workflow.workflow.md
|
|
52
|
+
steps:
|
|
53
|
+
- name: analyze
|
|
54
|
+
agent: my-agent
|
|
55
|
+
# The agent loads my-skill automatically
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Agent YAML Frontmatter Reference
|
|
59
|
+
|
|
60
|
+
| Field | Type | Description |
|
|
61
|
+
|-------|------|-------------|
|
|
62
|
+
| `name` | string | Agent identifier |
|
|
63
|
+
| `description` | string | One-line description for routing |
|
|
64
|
+
| `tools` | csv | Tools the agent can use |
|
|
65
|
+
| `model` | string | Model override |
|
|
66
|
+
| `skills` | csv | Skills to inject |
|
|
67
|
+
| `effort` | `low`/`medium`/`high` | Work effort level |
|
|
68
|
+
| `maxTurns` | number | Maximum conversation turns |
|
|
69
|
+
| `contextMode` | `fresh`/`fork` | Context inheritance |
|
|
70
|
+
| `loadMode` | `essential`/`lean` | Tool loading strategy |
|
|
71
|
+
| `thinking` | string | Thinking level override |
|
package/package.json
CHANGED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: council
|
|
3
|
+
description: >
|
|
4
|
+
Spawn 3 adversarial subagents (Skeptic, Pragmatist, Critic) to evaluate a decision,
|
|
5
|
+
architecture choice, or plan. Anti-anchoring: each role receives ONLY the question,
|
|
6
|
+
not conversation history. Aggregates votes into consensus recommendation with dissent tracking.
|
|
7
|
+
Use when facing critical decisions, architecture choices, security tradeoffs, or plan reviews
|
|
8
|
+
where single-perspective analysis is insufficient.
|
|
9
|
+
origin: ECC/skills/council
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# Council Pattern — Adversarial Multi-Perspective Decision Making
|
|
13
|
+
|
|
14
|
+
## When to Use
|
|
15
|
+
|
|
16
|
+
- Evaluating architecture decisions with significant tradeoffs
|
|
17
|
+
- Reviewing security-sensitive design choices
|
|
18
|
+
- Validating implementation plans before execution
|
|
19
|
+
- Resolving ambiguity where multiple valid approaches exist
|
|
20
|
+
- Deciding whether to build, buy, or extend
|
|
21
|
+
|
|
22
|
+
## Prerequisites
|
|
23
|
+
|
|
24
|
+
- A clearly formulated question or decision to evaluate
|
|
25
|
+
- Sufficient context about the system for meaningful analysis
|
|
26
|
+
|
|
27
|
+
## Operating Rules
|
|
28
|
+
|
|
29
|
+
1. You MUST spawn exactly 3 subagents with isolated context (fresh, not forked)
|
|
30
|
+
2. Each subagent receives ONLY the question — NO conversation history (anti-anchoring)
|
|
31
|
+
3. You MUST NOT influence any subagent's analysis direction
|
|
32
|
+
4. You MUST record all 3 votes before forming consensus
|
|
33
|
+
5. You MUST include dissent in the final recommendation
|
|
34
|
+
|
|
35
|
+
## Workflow
|
|
36
|
+
|
|
37
|
+
### Step 1: Formulate the Question
|
|
38
|
+
|
|
39
|
+
Write a clear, neutral question that includes:
|
|
40
|
+
- The decision to be made
|
|
41
|
+
- Relevant constraints (performance, security, timeline)
|
|
42
|
+
- Available options (if known)
|
|
43
|
+
- What "success" looks like
|
|
44
|
+
|
|
45
|
+
DO NOT bias the question toward any particular answer.
|
|
46
|
+
|
|
47
|
+
### Step 2: Spawn 3 Council Members
|
|
48
|
+
|
|
49
|
+
Launch 3 parallel subagents with these EXACT roles:
|
|
50
|
+
|
|
51
|
+
**Skeptic** (Goal: Find flaws):
|
|
52
|
+
```
|
|
53
|
+
You are the Skeptic on a council evaluating: [QUESTION]
|
|
54
|
+
|
|
55
|
+
Your role: Find every possible flaw, risk, and failure mode.
|
|
56
|
+
- Challenge assumptions
|
|
57
|
+
- Identify edge cases that break the proposed approach
|
|
58
|
+
- Focus on what could go WRONG
|
|
59
|
+
- Rate your confidence (0.0-1.0) and give a PRO/CON/ABSTAIN position
|
|
60
|
+
- Provide your top 3 risks
|
|
61
|
+
|
|
62
|
+
Output format:
|
|
63
|
+
Position: PRO | CON | ABSTAIN
|
|
64
|
+
Confidence: 0.0-1.0
|
|
65
|
+
Reasoning: [your analysis]
|
|
66
|
+
Top 3 Risks: [list]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Pragmatist** (Goal: Evaluate tradeoffs):
|
|
70
|
+
```
|
|
71
|
+
You are the Pragmatist on a council evaluating: [QUESTION]
|
|
72
|
+
|
|
73
|
+
Your role: Weigh practical tradeoffs objectively.
|
|
74
|
+
- Consider implementation cost, maintenance burden, team impact
|
|
75
|
+
- Evaluate time-to-value and opportunity cost
|
|
76
|
+
- Compare against realistic alternatives
|
|
77
|
+
- Rate your confidence (0.0-1.0) and give a PRO/CON/ABSTAIN position
|
|
78
|
+
|
|
79
|
+
Output format:
|
|
80
|
+
Position: PRO | CON | ABSTAIN
|
|
81
|
+
Confidence: 0.0-1.0
|
|
82
|
+
Reasoning: [your analysis]
|
|
83
|
+
Alternatives Considered: [list]
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Critic** (Goal: Stress-test reasoning):
|
|
87
|
+
```
|
|
88
|
+
You are the Critic on a council evaluating: [QUESTION]
|
|
89
|
+
|
|
90
|
+
Your role: Stress-test the logical foundations of each possible answer.
|
|
91
|
+
- Identify logical fallacies in common arguments for/against
|
|
92
|
+
- Check if the question itself contains hidden assumptions
|
|
93
|
+
- Evaluate whether the stated constraints are real or assumed
|
|
94
|
+
- Rate your confidence (0.0-1.0) and give a PRO/CON/ABSTAIN position
|
|
95
|
+
|
|
96
|
+
Output format:
|
|
97
|
+
Position: PRO | CON | ABSTAIN
|
|
98
|
+
Confidence: 0.0-1.0
|
|
99
|
+
Reasoning: [your analysis]
|
|
100
|
+
Hidden Assumptions: [list]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Step 3: Aggregate Votes
|
|
104
|
+
|
|
105
|
+
Collect all 3 responses. Compute consensus:
|
|
106
|
+
|
|
107
|
+
| Vote Pattern | Consensus Level | Action |
|
|
108
|
+
|---|---|---|
|
|
109
|
+
| 3 PRO | **Strong accept** | Proceed with high confidence |
|
|
110
|
+
| 2 PRO, 1 CON | **Weak accept** | Proceed, but address CON dissent |
|
|
111
|
+
| 2 PRO, 1 ABSTAIN | **Accept with uncertainty** | Proceed, investigate ABSTAIN concerns |
|
|
112
|
+
| 1 PRO, 1 CON, 1 ABSTAIN | **No consensus** | Reformulate question or gather more data |
|
|
113
|
+
| 2 CON, 1 PRO | **Weak reject** | Do not proceed; explore alternatives |
|
|
114
|
+
| 3 CON | **Strong reject** | Reject; fundamentally rethink approach |
|
|
115
|
+
|
|
116
|
+
### Step 4: Output Recommendation
|
|
117
|
+
|
|
118
|
+
```markdown
|
|
119
|
+
## Council Decision: [Question Summary]
|
|
120
|
+
|
|
121
|
+
### Votes
|
|
122
|
+
| Role | Position | Confidence |
|
|
123
|
+
|------|----------|------------|
|
|
124
|
+
| Skeptic | PRO/CON/ABSTAIN | 0.X |
|
|
125
|
+
| Pragmatist | PRO/CON/ABSTAIN | 0.X |
|
|
126
|
+
| Critic | PRO/CON/ABSTAIN | 0.X |
|
|
127
|
+
|
|
128
|
+
### Consensus: [STRONG ACCEPT | WEAK ACCEPT | NO CONSENSUS | WEAK REJECT | STRONG REJECT]
|
|
129
|
+
|
|
130
|
+
### Recommendation
|
|
131
|
+
[One-paragraph synthesis]
|
|
132
|
+
|
|
133
|
+
### Key Insights
|
|
134
|
+
- [Best point from Skeptic]
|
|
135
|
+
- [Best point from Pragmatist]
|
|
136
|
+
- [Best point from Critic]
|
|
137
|
+
|
|
138
|
+
### Dissent
|
|
139
|
+
[Summary of any dissenting opinions and why they were overruled or remain unresolved]
|
|
140
|
+
|
|
141
|
+
### Action Items
|
|
142
|
+
- [ ] [Specific next step 1]
|
|
143
|
+
- [ ] [Specific next step 2]
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Anti-Patterns
|
|
147
|
+
|
|
148
|
+
- DO NOT spawn fewer than 3 roles
|
|
149
|
+
- DO NOT share one subagent's analysis with another (contamination)
|
|
150
|
+
- DO NOT phrase the question to favor a specific outcome
|
|
151
|
+
- DO NOT override the council's consensus without documented justification
|
|
152
|
+
- DO NOT use council for trivial decisions (wastes resources)
|
|
153
|
+
|
|
154
|
+
## Enforcement — Council Gate
|
|
155
|
+
|
|
156
|
+
Before finalizing a council result, verify:
|
|
157
|
+
|
|
158
|
+
- [ ] All 3 roles spawned with isolated (fresh) context
|
|
159
|
+
- [ ] Each role received ONLY the question, no prior conversation
|
|
160
|
+
- [ ] All 3 votes recorded with confidence scores
|
|
161
|
+
- [ ] Consensus level computed from vote pattern
|
|
162
|
+
- [ ] Dissent explicitly documented (not hidden)
|
|
163
|
+
- [ ] Recommendation includes actionable next steps
|
|
@@ -36,6 +36,8 @@ export interface AgentConfig {
|
|
|
36
36
|
contextMode?: "fresh" | "fork";
|
|
37
37
|
/** Maximum turns for this agent. Overrides runtime config if set. */
|
|
38
38
|
maxTurns?: number;
|
|
39
|
+
/** Effort level for this agent. Controls how much work the agent puts in. */
|
|
40
|
+
effort?: "low" | "medium" | "high";
|
|
39
41
|
/** Tools to explicitly forbid for this agent. Takes precedence over allowedTools. */
|
|
40
42
|
disallowedTools?: string[];
|
|
41
43
|
disabled?: boolean;
|
|
@@ -67,7 +69,8 @@ export function getAgentSessionOptions(role: string): {
|
|
|
67
69
|
* @param agent - The agent configuration
|
|
68
70
|
* @param role - The role name to use for tool restrictions (defaults to agent.name)
|
|
69
71
|
*/
|
|
70
|
-
|
|
72
|
+
/** @internal */
|
|
73
|
+
function buildAgentSessionOptions(
|
|
71
74
|
agent: AgentConfig,
|
|
72
75
|
role?: string,
|
|
73
76
|
): {
|
|
@@ -376,6 +376,7 @@ function parseAgentFile(filePath: string, source: ResourceSource): AgentConfig |
|
|
|
376
376
|
defaultTools: frontmatter.defaultTools !== undefined ? parseCsv(frontmatter.defaultTools) ?? null : undefined,
|
|
377
377
|
contextMode: parseContextMode(frontmatter.contextMode),
|
|
378
378
|
maxTurns: (() => { const n = Number.parseInt(frontmatter.maxTurns, 10); return Number.isFinite(n) && n > 0 ? n : undefined; })(),
|
|
379
|
+
effort: frontmatter.effort === "low" || frontmatter.effort === "medium" || frontmatter.effort === "high" ? frontmatter.effort : undefined,
|
|
379
380
|
disabled: frontmatter.disabled === "true" || frontmatter.enabled === "false",
|
|
380
381
|
routing: triggers || useWhen || avoidWhen || cost || category ? { triggers, useWhen, avoidWhen, cost, category } : undefined,
|
|
381
382
|
};
|
|
@@ -4,13 +4,15 @@
|
|
|
4
4
|
|
|
5
5
|
import type { RunMetrics } from "../state/run-metrics.ts";
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
/** @internal */
|
|
8
|
+
interface FeedbackLoopStats {
|
|
8
9
|
runsObserved: number;
|
|
9
10
|
avgSuccessRate: number;
|
|
10
11
|
recommendations: string[];
|
|
11
12
|
}
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
/** @internal */
|
|
15
|
+
class FeedbackLoop {
|
|
14
16
|
private runs: RunMetrics[] = [];
|
|
15
17
|
private static readonly MAX_RUNS = 1000;
|
|
16
18
|
|
|
@@ -32,6 +32,8 @@ function requestId(raw: unknown): string | undefined {
|
|
|
32
32
|
|
|
33
33
|
function reply(events: EventBusLike, channel: string, id: string | undefined, payload: RpcReply): void {
|
|
34
34
|
if (!id) return;
|
|
35
|
+
// SECURITY: Validate requestId format to prevent channel injection.
|
|
36
|
+
if (!/^[a-zA-Z0-9_-]+$/.test(id)) return;
|
|
35
37
|
events.emit(`${channel}:reply:${id}`, payload);
|
|
36
38
|
}
|
|
37
39
|
|
|
@@ -59,6 +61,36 @@ function isAllowedRpcOperation(operation: string): boolean {
|
|
|
59
61
|
return RPC_ALLOWED_OPERATIONS.has(operation);
|
|
60
62
|
}
|
|
61
63
|
|
|
64
|
+
// SECURITY (HIGH #4 fix): In-memory rate limiter for RPC run requests.
|
|
65
|
+
// Prevents any extension from spawning unlimited child processes.
|
|
66
|
+
const RPC_RATE_LIMIT_MAX = 5; // Max 5 RPC run requests...
|
|
67
|
+
const RPC_RATE_LIMIT_WINDOW_MS = 60_000; // ...per 60 seconds
|
|
68
|
+
const rpcRunTimestamps: number[] = [];
|
|
69
|
+
|
|
70
|
+
function checkRpcRateLimit(): { allowed: boolean; retryAfterMs?: number } {
|
|
71
|
+
const now = Date.now();
|
|
72
|
+
// Evict entries older than the window
|
|
73
|
+
const cutoff = now - RPC_RATE_LIMIT_WINDOW_MS;
|
|
74
|
+
while (rpcRunTimestamps.length > 0 && rpcRunTimestamps[0] < cutoff) {
|
|
75
|
+
rpcRunTimestamps.shift();
|
|
76
|
+
}
|
|
77
|
+
if (rpcRunTimestamps.length >= RPC_RATE_LIMIT_MAX) {
|
|
78
|
+
const oldestInWindow = rpcRunTimestamps[0];
|
|
79
|
+
const retryAfterMs = oldestInWindow + RPC_RATE_LIMIT_WINDOW_MS - now;
|
|
80
|
+
return { allowed: false, retryAfterMs: Math.max(retryAfterMs, 1000) };
|
|
81
|
+
}
|
|
82
|
+
return { allowed: true };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function recordRpcRun(): void {
|
|
86
|
+
rpcRunTimestamps.push(Date.now());
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Reset the RPC rate limiter. Used primarily for testing. */
|
|
90
|
+
export function resetRpcRateLimit(): void {
|
|
91
|
+
rpcRunTimestamps.length = 0;
|
|
92
|
+
}
|
|
93
|
+
|
|
62
94
|
function isAllowedRpcRunParams(params: TeamToolParamsValue): { ok: boolean; error?: string } {
|
|
63
95
|
// SECURITY: Require explicit intent for any RPC-initiated run creation.
|
|
64
96
|
// This prevents malicious extensions from spawning child Pi processes silently.
|
|
@@ -83,6 +115,12 @@ function on(events: EventBusLike, channel: string, handler: (raw: unknown) => vo
|
|
|
83
115
|
return typeof unsub === "function" ? unsub : () => {};
|
|
84
116
|
}
|
|
85
117
|
|
|
118
|
+
// SECURITY TRUST BOUNDARY: RPC channels (pi-crew:rpc:run, pi-crew:rpc:status,
|
|
119
|
+
// pi-crew:rpc:live-control) are accessible to any extension on the shared event
|
|
120
|
+
// bus. Mitigations applied: rate limiting (RPC_RATE_LIMIT_MAX), explicit intent
|
|
121
|
+
// requirement for runs, operation allowlist for live-control reads, and cwd
|
|
122
|
+
// containment validation. A full fix requires event-bus-level origin signing.
|
|
123
|
+
|
|
86
124
|
export function registerPiCrewRpc(events: EventBusLike | undefined, getCtx: () => ExtensionContext | undefined): PiCrewRpcHandle | undefined {
|
|
87
125
|
if (!events) return undefined;
|
|
88
126
|
const unsubs = [
|
|
@@ -90,6 +128,16 @@ export function registerPiCrewRpc(events: EventBusLike | undefined, getCtx: () =
|
|
|
90
128
|
on(events, "pi-crew:rpc:run", async (raw) => {
|
|
91
129
|
const id = requestId(raw);
|
|
92
130
|
try {
|
|
131
|
+
// SECURITY (HIGH #4 fix): Rate limit RPC run requests
|
|
132
|
+
const rateLimit = checkRpcRateLimit();
|
|
133
|
+
if (!rateLimit.allowed) {
|
|
134
|
+
reply(events, "pi-crew:rpc:run", id, {
|
|
135
|
+
success: false,
|
|
136
|
+
error: `RPC run rate limit exceeded. Max ${RPC_RATE_LIMIT_MAX} requests per ${RPC_RATE_LIMIT_WINDOW_MS / 1000}s. Retry after ${Math.ceil((rateLimit.retryAfterMs ?? 60000) / 1000)}s.`,
|
|
137
|
+
});
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
recordRpcRun();
|
|
93
141
|
const ctx = getCtx();
|
|
94
142
|
if (!ctx) throw new Error("No active pi-crew session context.");
|
|
95
143
|
// Validate payload: only allow known fields from TeamToolParamsValue
|
|
@@ -428,7 +428,8 @@ export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommand
|
|
|
428
428
|
if (selection.action === "agent-transcript" && await openTranscriptViewer(ctx, selection.runId)) continue;
|
|
429
429
|
if (selection.action === "agent-live" && await openLiveConversation(ctx, selection.runId)) continue;
|
|
430
430
|
if (selection.action === "agent-live") { await notifyCommandResult(ctx, commandText({ content: [{ type: "text", text: "No live agent found for this run." }] })); continue; }
|
|
431
|
-
const result = selection.action === "api" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-manifest" } }, teamCommandContext(ctx)) : selection.action === "agents" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "agent-dashboard" } }, teamCommandContext(ctx)) : selection.action === "mailbox" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-mailbox" } }, teamCommandContext(ctx)) : selection.action === "agent-events" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-events", limit: 50 } }, teamCommandContext(ctx)) : selection.action === "agent-output" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-output", maxBytes: 32_000 } }, teamCommandContext(ctx)) : selection.action === "agent-transcript" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-transcript" } }, teamCommandContext(ctx)) :
|
|
431
|
+
const result = selection.action === "api" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-manifest" } }, teamCommandContext(ctx)) : selection.action === "agents" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "agent-dashboard" } }, teamCommandContext(ctx)) : selection.action === "mailbox" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-mailbox" } }, teamCommandContext(ctx)) : selection.action === "agent-events" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-events", limit: 50 } }, teamCommandContext(ctx)) : selection.action === "agent-output" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-output", maxBytes: 32_000 } }, teamCommandContext(ctx)) : selection.action === "agent-transcript" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-transcript" } }, teamCommandContext(ctx)) : // eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
432
|
+
await handleTeamTool({ action: selection.action as any, runId: selection.runId }, teamCommandContext(ctx));
|
|
432
433
|
await notifyCommandResult(ctx, commandText(result));
|
|
433
434
|
return;
|
|
434
435
|
}
|
|
@@ -96,9 +96,11 @@ export function registerSubagentTools(pi: ExtensionAPI, subagentManager: Subagen
|
|
|
96
96
|
}
|
|
97
97
|
return foregroundResult;
|
|
98
98
|
},
|
|
99
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
99
100
|
renderCall(args: any, theme: any, context: any): any {
|
|
100
101
|
return renderAgentToolCall(args, theme, context);
|
|
101
102
|
},
|
|
103
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
102
104
|
renderResult(result: any, options: any, theme: any, context: any): any {
|
|
103
105
|
return renderAgentToolResult(result, options, theme, context);
|
|
104
106
|
},
|
|
@@ -105,9 +105,11 @@ export function registerTeamTool(pi: ExtensionAPI, deps: RegisterTeamToolDeps):
|
|
|
105
105
|
stopProgress.stop();
|
|
106
106
|
}
|
|
107
107
|
},
|
|
108
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
108
109
|
renderCall(args: any, theme: any, context: any): any {
|
|
109
110
|
return renderTeamToolCall(args, theme, context);
|
|
110
111
|
},
|
|
112
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
111
113
|
renderResult(result: any, options: any, theme: any, context: any): any {
|
|
112
114
|
return renderTeamToolResult(result, options, theme, context);
|
|
113
115
|
},
|
|
@@ -58,6 +58,7 @@ export async function openLiveConversation(ctx: ExtensionCommandContext, initial
|
|
|
58
58
|
const handle = liveAgents.find((h) => h.runId === selected.runId && (selected.taskId ? h.taskId === selected.taskId : true));
|
|
59
59
|
if (!handle) return false;
|
|
60
60
|
const theme = asCrewTheme({});
|
|
61
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
61
62
|
await ctx.ui.custom<undefined>((tui: any, _theme: any, _keybindings: any, done: (result: undefined) => void) => {
|
|
62
63
|
const columns = tui?.terminal?.columns ?? 80;
|
|
63
64
|
const rows = tui?.terminal?.rows ?? 24;
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
import * as os from "node:os";
|
|
4
|
+
import * as crypto from "node:crypto";
|
|
4
5
|
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
|
5
6
|
import { writeArtifact } from "../state/artifact-store.ts";
|
|
6
7
|
import { readEvents, type TeamEvent } from "../state/event-log.ts";
|
|
7
8
|
import { redactSecrets } from "../utils/redaction.ts";
|
|
8
9
|
|
|
9
10
|
/** Replace absolute paths containing home directory with ~/ */
|
|
11
|
+
/** Escape special regex characters in a string */
|
|
12
|
+
function escapeRegex(str: string): string {
|
|
13
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** Only redact home directory at path boundaries to avoid corrupting substrings */
|
|
17
|
+
function redactHomePathInString(str: string, home: string): string {
|
|
18
|
+
return str.replace(new RegExp(`(^|(?<=[:=/]))${escapeRegex(home)}`, "g"), "$1~");
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Replace absolute paths containing home directory with ~/ at path boundaries only */
|
|
10
22
|
function redactHomePaths<T>(obj: T): T {
|
|
11
23
|
const home = os.homedir();
|
|
12
24
|
if (!home) return redactSecrets(obj) as T;
|
|
13
25
|
const json = JSON.stringify(obj);
|
|
14
|
-
const safe = json
|
|
26
|
+
const safe = redactHomePathInString(json, home);
|
|
15
27
|
return redactSecrets(JSON.parse(safe)) as T;
|
|
16
28
|
}
|
|
17
29
|
|
|
@@ -37,6 +49,9 @@ export function exportRunBundle(manifest: TeamRunManifest, tasks: TeamTaskState[
|
|
|
37
49
|
events: safeEvents as TeamEvent[],
|
|
38
50
|
artifactPaths: safeManifest.artifacts.map((artifact) => artifact.path),
|
|
39
51
|
};
|
|
52
|
+
// Compute SHA-256 integrity hash of the bundle and store in manifest
|
|
53
|
+
const sha256 = crypto.createHash("sha256").update(JSON.stringify(bundle)).digest("hex");
|
|
54
|
+
(bundle.manifest as unknown as Record<string, unknown>).sha256 = sha256;
|
|
40
55
|
const json = writeArtifact(manifest.artifactsRoot, {
|
|
41
56
|
kind: "metadata",
|
|
42
57
|
relativePath: "export/run-export.json",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as path from "node:path";
|
|
3
|
+
import * as crypto from "node:crypto";
|
|
3
4
|
import { assertRunBundle } from "./run-bundle-schema.ts";
|
|
4
5
|
import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
|
|
5
6
|
import { DEFAULT_PATHS } from "../config/defaults.ts";
|
|
@@ -42,6 +43,21 @@ export function importRunBundle(cwd: string, bundlePath: string, scope: "project
|
|
|
42
43
|
if (!isContained) throw new Error(`Import path must be within project directory or crew root: ${resolvedPath}`);
|
|
43
44
|
const raw = JSON.parse(fs.readFileSync(resolvedPath, "utf-8")) as unknown;
|
|
44
45
|
assertRunBundle(raw);
|
|
46
|
+
|
|
47
|
+
// Integrity check: verify SHA-256 hash if present in manifest
|
|
48
|
+
const bundleJson = fs.readFileSync(resolvedPath, "utf-8");
|
|
49
|
+
const parsedForHash = JSON.parse(bundleJson) as { manifest?: { sha256?: string } };
|
|
50
|
+
if (parsedForHash.manifest?.sha256) {
|
|
51
|
+
const expectedHash = parsedForHash.manifest.sha256;
|
|
52
|
+
// Recompute hash by stringifying the bundle without the sha256 field
|
|
53
|
+
const { sha256: _sha256, ...manifestWithoutHash } = parsedForHash.manifest as Record<string, unknown> & { sha256?: string };
|
|
54
|
+
const bundleForHash = { ...parsedForHash, manifest: manifestWithoutHash };
|
|
55
|
+
const recomputedHash = crypto.createHash("sha256").update(JSON.stringify(bundleForHash)).digest("hex");
|
|
56
|
+
if (recomputedHash !== expectedHash) {
|
|
57
|
+
throw new Error(`Integrity check failed: SHA-256 mismatch. Expected ${expectedHash}, got ${recomputedHash}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
45
61
|
const runId = assertSafePathId("runId", raw.manifest.runId);
|
|
46
62
|
const importedAt = new Date().toISOString();
|
|
47
63
|
|
|
@@ -40,9 +40,13 @@ export function handleAnchorSet(
|
|
|
40
40
|
const cfg = params.config ?? {};
|
|
41
41
|
|
|
42
42
|
// Parse context from config
|
|
43
|
+
const POLLUTED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
|
|
43
44
|
const context: Record<string, unknown> = {};
|
|
44
45
|
if (cfg.context && typeof cfg.context === "object") {
|
|
45
|
-
|
|
46
|
+
const raw = cfg.context as Record<string, unknown>;
|
|
47
|
+
for (const [k, v] of Object.entries(raw)) {
|
|
48
|
+
if (!POLLUTED_KEYS.has(k)) context[k] = v;
|
|
49
|
+
}
|
|
46
50
|
}
|
|
47
51
|
if (cfg.key) {
|
|
48
52
|
// Single key shorthand
|