npm - pi-crew - Versions diffs - 0.5.2 → 0.5.6 - Mend

pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/CHANGELOG.md +183 -0
package/README.md +17 -1
package/docs/architecture.md +2 -0
package/docs/bugs/cross-session-notification-leakage.md +82 -0
package/docs/coding-agent-optimization.md +268 -0
package/docs/deep-review-report.md +384 -0
package/docs/distillation/cybersecurity-patterns.md +294 -0
package/docs/migration-v0.4-v0.5.md +208 -0
package/docs/optimization-plan.md +642 -0
package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
package/docs/pi-mono-opportunities.md +969 -0
package/docs/pi-mono-review.md +291 -0
package/docs/skills/REFERENCE.md +144 -0
package/package.json +12 -9
package/skills/artifact-analysis-loop/SKILL.md +302 -0
package/skills/async-worker-recovery/SKILL.md +19 -1
package/skills/child-pi-spawning/SKILL.md +19 -6
package/skills/context-artifact-hygiene/SKILL.md +19 -2
package/skills/delegation-patterns/SKILL.md +68 -3
package/skills/detection-pipeline-design/SKILL.md +285 -0
package/skills/event-log-tracing/SKILL.md +20 -6
package/skills/git-master/SKILL.md +20 -6
package/skills/hunting-investigation-loop/SKILL.md +401 -0
package/skills/incident-playbook-construction/SKILL.md +383 -0
package/skills/live-agent-lifecycle/SKILL.md +20 -6
package/skills/mailbox-interactive/SKILL.md +19 -6
package/skills/model-routing-context/SKILL.md +19 -1
package/skills/multi-perspective-review/SKILL.md +19 -4
package/skills/observability-reliability/SKILL.md +19 -2
package/skills/orchestration/SKILL.md +20 -2
package/skills/ownership-session-security/SKILL.md +20 -2
package/skills/pi-extension-lifecycle/SKILL.md +20 -2
package/skills/post-mortem/SKILL.md +7 -2
package/skills/read-only-explorer/SKILL.md +20 -6
package/skills/requirements-to-task-packet/SKILL.md +23 -3
package/skills/resource-discovery-config/SKILL.md +20 -2
package/skills/runtime-state-reader/SKILL.md +20 -2
package/skills/safe-bash/SKILL.md +21 -6
package/skills/scrutinize/SKILL.md +20 -2
package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
package/skills/security-review/SKILL.md +560 -0
package/skills/state-mutation-locking/SKILL.md +22 -2
package/skills/systematic-debugging/SKILL.md +8 -6
package/skills/threat-hypothesis-framework/SKILL.md +175 -0
package/skills/ui-render-performance/SKILL.md +20 -2
package/skills/verification-before-done/SKILL.md +17 -2
package/skills/widget-rendering/SKILL.md +21 -6
package/skills/workspace-isolation/SKILL.md +20 -6
package/skills/worktree-isolation/SKILL.md +20 -6
package/src/agents/agent-config.ts +40 -1
package/src/benchmark/benchmark-runner.ts +45 -0
package/src/benchmark/feedback-loop.ts +5 -0
package/src/config/config.ts +32 -5
package/src/config/role-tools.ts +82 -0
package/src/config/suggestions.ts +8 -0
package/src/config/types.ts +4 -0
package/src/extension/async-notifier.ts +10 -1
package/src/extension/crew-cleanup.ts +114 -0
package/src/extension/cross-extension-rpc.ts +1 -1
package/src/extension/notification-router.ts +18 -0
package/src/extension/register.ts +27 -19
package/src/extension/registration/subagent-tools.ts +1 -1
package/src/extension/team-tool/anchor.ts +201 -0
package/src/extension/team-tool/api.ts +2 -1
package/src/extension/team-tool/auto-summarize.ts +154 -0
package/src/extension/team-tool/run.ts +42 -7
package/src/extension/team-tool.ts +44 -2
package/src/hooks/registry.ts +1 -3
package/src/observability/event-bus.ts +69 -0
package/src/observability/event-to-metric.ts +0 -2
package/src/runtime/anchor-manager.ts +473 -0
package/src/runtime/async-runner.ts +8 -4
package/src/runtime/auto-summarize.ts +350 -0
package/src/runtime/background-runner.ts +10 -3
package/src/runtime/budget-tracker.ts +354 -0
package/src/runtime/chain-runner.ts +507 -0
package/src/runtime/child-pi.ts +123 -35
package/src/runtime/crash-recovery.ts +5 -4
package/src/runtime/crew-agent-runtime.ts +1 -0
package/src/runtime/custom-tools/irc-tool.ts +13 -0
package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
package/src/runtime/delivery-coordinator.ts +10 -3
package/src/runtime/dynamic-script-runner.ts +482 -0
package/src/runtime/foreground-control.ts +87 -17
package/src/runtime/handoff-manager.ts +589 -0
package/src/runtime/hidden-handoff.ts +424 -0
package/src/runtime/live-agent-manager.ts +20 -4
package/src/runtime/live-session-runtime.ts +39 -4
package/src/runtime/manifest-cache.ts +2 -1
package/src/runtime/model-resolver.ts +16 -4
package/src/runtime/phase-tracker.ts +373 -0
package/src/runtime/pi-args.ts +11 -1
package/src/runtime/pi-json-output.ts +31 -0
package/src/runtime/pipeline-runner.ts +514 -0
package/src/runtime/progress-tracker.ts +124 -0
package/src/runtime/retry-runner.ts +354 -0
package/src/runtime/sandbox.ts +252 -0
package/src/runtime/scheduler.ts +7 -2
package/src/runtime/skill-effectiveness.ts +473 -0
package/src/runtime/skill-instructions.ts +37 -3
package/src/runtime/subagent-manager.ts +1 -1
package/src/runtime/task-graph.ts +11 -1
package/src/runtime/task-runner.ts +92 -18
package/src/runtime/team-runner.ts +13 -12
package/src/runtime/tool-progress.ts +10 -3
package/src/runtime/verification-gates.ts +367 -0
package/src/schema/team-tool-schema.ts +37 -0
package/src/skills/discover-skills.ts +5 -0
package/src/state/active-run-registry.ts +9 -2
package/src/state/contracts.ts +9 -0
package/src/state/crew-init.ts +3 -3
package/src/state/decision-ledger.ts +98 -55
package/src/state/event-log-rotation.ts +2 -2
package/src/state/event-log.ts +144 -10
package/src/state/hook-instinct-bridge.ts +5 -5
package/src/state/mailbox.ts +10 -0
package/src/state/run-cache.ts +18 -8
package/src/state/state-store.ts +3 -1
package/src/state/types.ts +4 -0
package/src/tools/safe-bash-extension.ts +1 -0
package/src/tools/safe-bash.ts +152 -20
package/src/types/new-api-types.ts +34 -0
package/src/ui/agent-management-overlay.ts +5 -1
package/src/ui/crew-widget.ts +29 -15
package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
package/src/ui/powerbar-publisher.ts +101 -7
package/src/ui/tool-render.ts +15 -15
package/src/ui/transcript-cache.ts +13 -0
package/src/utils/bm25-search.ts +16 -8
package/src/utils/env-filter.ts +8 -5
package/src/utils/redaction.ts +169 -15
package/src/utils/session-utils.ts +52 -0
package/src/utils/sse-parser.ts +10 -1
package/src/worktree/cleanup.ts +6 -1
package/src/worktree/worktree-manager.ts +32 -13
package/workflows/chain.workflow.md +252 -0
package/workflows/pipeline.workflow.md +27 -0

package/skills/threat-hypothesis-framework/SKILL.md ADDED Viewed

@@ -0,0 +1,175 @@
+---
+name: threat-hypothesis-framework
+description: "Structured investigation using testable hypotheses."
+triggers:
+  - "hunt for"
+  - "investigate"
+  - "threat hypothesis"
+  - "test this pattern"
+  - "find evidence of"
+---
+# threat-hypothesis-framework
+Use this skill when conducting hypothesis-driven investigation and threat hunting.
+## Source
+Distilled from `building-threat-hunt-hypothesis-framework` (Anthropic Cybersecurity Skills) and generalized for software/codebase context.
+## When to Use
+- Proactively hunting for indicators of compromise in code
+- After threat intelligence indicates suspicious patterns
+- During incident investigation to scope extent
+- When EDR/logs alert on related indicators
+- During periodic security assessments
+## Workflow
+```markdown
+## Hypothesis Investigation Loop
+1. **Formulate** → Given [observed IOCs/patterns], hypothesize [attack scenario]
+2. **Identify** → List data sources: [files, commits, logs, configs]
+3. **Search** → Run queries across identified sources
+4. **Analyze** → Pattern match: [technique, indicator, artifact]
+5. **Validate** → Confirm with [secondary source, cross-reference]
+6. **Correlate** → Link findings to [broader campaign, actor]
+7. **Report** → Document: [finding, confidence, next_action]
+```
+## Hypothesis Structure
+```yaml
+hypothesis:
+  id: string                    # e.g., "HY-2026-001"
+  technique: string              # e.g., "credential-theft", "supply-chain"
+  description: string            # What we're testing
+  data_sources:
+    - type: [file|commit|log|config]
+      locations: [paths, globs]
+  search_patterns:
+    - pattern: string
+      type: [regex|AST|signature]
+  validation:
+    - method: string
+      expected_result: string
+  confidence_levels:
+    high: [confirmed by multiple sources]
+    medium: [single source, needs validation]
+    low: [heuristic match, requires investigation]
+```
+## Hunt Report Format
+```
+Hunt ID: [HY-runid-date-seq]
+Hypothesis: [what we're testing]
+Data Sources: [where we looked]
+Search Patterns: [what we searched for]
+Findings:
+  - File: [path]
+    Line: [number]
+    Evidence: [what matched]
+    Confidence: [High/Medium/Low]
+Correlation: [link to other findings]
+Next Actions:
+  - investigate: [further analysis needed]
+  - contain: [immediate action required]
+  - close: [false positive, no action]
+```
+## Investigation Examples
+### Example 1: Credential Detection Hunt
+```yaml
+hypothesis:
+  id: HY-2026-042
+  technique: hardcoded-credentials
+  description: Search for hardcoded secrets in codebase
+  data_sources:
+    - type: file
+      locations: ["**/*.ts", "**/*.js", "**/*.env"]
+  search_patterns:
+    - pattern: '(api[_-]?key|secret|token|password)\s*[=:]'
+      type: regex
+    - pattern: 'process\.env\.[A-Z_]+'
+      type: AST
+  validation:
+    - method: git history check
+      expected_result: No recent secret additions
+    - method: secret scanning tool
+      expected_result: Zero findings in main branch
+```
+### Example 2: Supply Chain Hunt
+```yaml
+hypothesis:
+  id: HY-2026-043
+  technique: dependency-confusion
+  description: Detect potential dependency confusion attacks
+  data_sources:
+    - type: file
+      locations: ["**/package.json", "**/requirements.txt"]
+  search_patterns:
+    - pattern: '"@private/.*
+      type: regex
+    - pattern: 'version.*>.*<.*9999999'
+      type: regex
+  validation:
+    - method: npm audit
+      expected_result: No anomalies
+    - method: typosquat check
+      expected_result: No similar package names
+```
+## Confidence Scoring
+| Level | Criteria | Action |
+|-------|----------|--------|
+| **High** | Confirmed by 2+ independent sources, exact match | Immediate action |
+| **Medium** | Single source, pattern match, needs validation | Investigate further |
+| **Low** | Heuristic match, possible false positive | Log and monitor |
+## Enforcement — Threat Hypothesis Framework Gate
+**Before reporting hunt findings, verify:**
+- [ ] Hypothesis clearly stated before search (not scattershot searching)
+- [ ] Data sources identified (files, commits, logs, configs)
+- [ ] Search patterns defined (regex, AST, signature)
+- [ ] Findings validated with secondary source or cross-reference
+- [ ] Confidence level assigned (High/Medium/Low) based on validation
+- [ ] Report includes: finding, confidence, next_action (investigate/contain/close)
+If ANY answer is NO → Stop. Complete hypothesis framework before reporting.
+## Anti-Patterns
+- **Don't** run hunt without clear hypothesis (scattershot searching)
+- **Don't** claim finding without validation (false positive risk)
+- **Don't** skip correlation step (missing broader context)
+- **Don't** report without confidence level (misleads stakeholders)
+## Tools
+| Tool | Purpose |
+|------|---------|
+| `rg` (ripgrep) | Pattern search in files |
+| `git log` | History investigation |
+| `semgrep` | AST-based pattern matching |
+| `npm audit` | Dependency vulnerability check |
+## Verification
+For hypothesis framework changes:
+```bash
+cd pi-crew
+npx tsc --noEmit
+node --experimental-strip-types --test test/unit/security-patterns.test.ts
+```
+*See also: `hunting-investigation-loop` skill for active hunting workflows.*

package/skills/ui-render-performance/SKILL.md CHANGED Viewed

@@ -1,8 +1,13 @@
 ---
 name: ui-render-performance
-description: Non-blocking Pi TUI render workflow. Use when changing widgets, powerbar/statusbar segments, dashboard panes, overlays, snapshot caches, or live UI refresh behavior.
+description: "Non-blocking Pi TUI render workflow."
+triggers:
+  - "widget render"
+  - "dashboard pane"
+  - "overlay update"
+  - "snapshot cache"
+  - "UI refresh"
 ---
 # ui-render-performance
 Use this skill for Pi/pi-crew TUI work.
@@ -23,6 +28,19 @@ Use this skill for Pi/pi-crew TUI work.
 - On session switch, cancel timers and ensure in-flight async preloads cannot update stale session UI.
 - Watch TTL interactions: a preload interval shorter than cache TTL prevents render-time refresh gaps.
+## Enforcement — UI Render Performance Gate
+**Before modifying widgets or UI rendering, verify:**
+- [ ] Render path is non-blocking (no fs calls, no network, no large JSON parsing)
+- [ ] All data preloaded async before first render
+- [ ] Snapshot cache TTL appropriate (500ms or less)
+- [ ] Render scheduler used for coalescing renders
+- [ ] Stale warnings filtered for terminal status (completed/failed/cancelled)
+- [ ] TTL interactions understood (preload interval < cache TTL)
+If ANY answer is NO → Stop. Fix render performance issues before proceeding.
 ## Anti-patterns
 - Do not call `loadConfig()`, `manifestCache.list()`, or `refreshIfStale()` repeatedly inside `renderTick()` unless backed by preloaded frame data.

package/skills/verification-before-done/SKILL.md CHANGED Viewed

@@ -1,8 +1,15 @@
 ---
 name: verification-before-done
-description: "Evidence before claims. Use before claiming work is complete, fixed, passing, reviewed, committed, or ready to hand off. Triggers: done, fixed, complete, ready to merge, can I close, is it working, verify this, check if it passes, all good, LGTM, ready to ship."
----
+description: "Evidence before claims."
+triggers:
+  - "done"
+  - "verify this"
+  - "is it working"
+  - "check if it passes"
+  - "ready to ship"
+---
 # verification-before-done
 Core principle: evidence before claims. A worker report, green-looking log, or previous run is not fresh verification.
@@ -64,3 +71,11 @@ Before finalizing any work, report:
 ## Red Flags
 Stop before saying done if you are using words like "should", "probably", "looks", "seems", "I think", or if you are trusting an agent report without checking evidence.
+## Anti-Patterns
+- **Don't** claim "tests pass" without running them in the current session
+- **Don't** trust agent reports without checking evidence yourself
+- **Don't** use fuzzy language like "seems", "probably", "looks like"
+- **Don't** skip providing verification commands for claims
+- **Don't** claim done if you're still using hypotheses instead of evidence

package/skills/widget-rendering/SKILL.md CHANGED Viewed

@@ -1,8 +1,14 @@
 ---
 name: widget-rendering
-description: Pi TUI crew widget data sources, display priority, and rendering performance. Use when debugging empty agents, ghost runs, or widget timing issues.
----
+description: "Pi TUI crew widget data sources, display priority, and rendering performance."
+triggers:
+  - "empty agent"
+  - "ghost run"
+  - "widget timing"
+  - "display priority"
+  - "snapshot cache"
+---
 # widget-rendering
 The crew widget (`src/ui/crew-widget.ts`) displays active runs and their agents in the Pi TUI. It must render synchronously at TTY refresh rate without blocking. Understanding the data sources and timing rules is essential for debugging display issues.
@@ -44,8 +50,6 @@ In-memory map from `live-agent-manager.ts`. Provides:
 **When used:** For completed agents, or when snapshot cache misses.
----
 ## Display Priority
 ```
@@ -66,8 +70,6 @@ for each active run:
       → apply linger rules (finishedAgents: 1min, errors: 2min)
 ```
----
 ## Active Runs Filtering
 `activeWidgetRuns()` determines which runs to show. Key filter: `isDisplayActiveRun(manifest, tasks)` from `process-status.ts`.
@@ -227,6 +229,19 @@ const TOOL_LABELS = {
 ---
+## Enforcement — Widget Rendering Gate
+**Before modifying widget rendering or display logic, verify:**
+- [ ] Render path is synchronous and non-blocking (no fs/network calls)
+- [ ] Display priority chain correct (liveAgents → snapshot cache → agents.json)
+- [ ] Ghost run filtering works (stale async PID + age > 30min hidden)
+- [ ] Stale handle eviction runs on every refresh (10min terminal handles removed)
+- [ ] Cache invalidation handles empty results (forces refresh on next tick)
+- [ ] Tool name extraction uses TOOL_LABELS for readable activity descriptions
+If ANY answer is NO → Stop. Fix widget rendering issues before proceeding.
 ## Anti-patterns
 - **Blocking render with fs calls**: Every `readFileSync`, `readdirSync`, `fs.statSync` in the render path causes frame drops. Preload everything async.

package/skills/workspace-isolation/SKILL.md CHANGED Viewed

@@ -1,8 +1,13 @@
 ---
 name: workspace-isolation
-description: "Workspace isolation boundaries. Use when ensuring agents from workspace A cannot access workspace B, or worktree-based parallel execution. Triggers: workspace isolation, cross-workspace access, escape boundary, worktree safety."
+description: "Workspace isolation boundaries."
+triggers:
+  - "workspace isolation"
+  - "cross-workspace access"
+  - "escape boundary"
+  - "worktree safety"
+  - "agent isolation"
 ---
 # workspace-isolation
 pi-crew enforces workspace isolation so that agents, runs, and live sessions from one project folder cannot be accessed from another. The workspace boundary is `manifest.cwd` — the directory where a run was initiated.
@@ -157,6 +162,19 @@ const DEFAULT_PATHS = {
 All paths are resolved relative to `manifest.cwd`, ensuring state stays under the project root.
+## Enforcement — Workspace Isolation Gate
+**Before performing cross-workspace operations, verify:**
+- [ ] workspaceId carried from manifest.cwd through all operations
+- [ ] Live agent operations filtered by workspaceId (list, steer, follow-up, stop, resume)
+- [ ] resolveContainedPath used (not startsWith) for path validation
+- [ ] resolveRealContainedPath used for symlink detection
+- [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
+- [ ] Cross-session cancel/respond rejected (force=true only when explicit)
+If ANY answer is NO → Stop. Verify workspace isolation before proceeding.
 ## Anti-patterns
 - **Passing raw cwd without validation**: Always use `resolveContainedPath` to ensure paths stay under workspace root.
@@ -165,8 +183,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
 - **Worktree name collision**: Use deterministic names from run/task IDs. Never accept user-controlled branch names.
 - **Dirty worktree removal**: Never force-remove worktrees with uncommitted changes unless explicitly confirmed.
----
 ## Source patterns
 - `src/extension/team-tool/api.ts` — workspaceId filter in list-live-agents, steer-agent, follow-up-agent, stop-agent, resume-agent
@@ -177,8 +193,6 @@ All paths are resolved relative to `manifest.cwd`, ensuring state stays under th
 - `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent
 - `src/config/defaults.ts` — DEFAULT_PATHS (state under project root)
----
 ## Verification
 ```bash

package/skills/worktree-isolation/SKILL.md CHANGED Viewed

@@ -1,8 +1,13 @@
 ---
 name: worktree-isolation
-description: Conflict-safe git worktree workflow. Use when running parallel implementation workers, isolating risky edits, or cleaning up task worktrees.
+description: "Conflict-safe git worktree workflow."
+triggers:
+  - "create worktree"
+  - "parallel workers"
+  - "isolate edits"
+  - "cleanup worktree"
+  - "branch freshness"
 ---
 # worktree-isolation
 Use this skill for worktree-based execution or cleanup. Git worktrees create isolated working directories that allow parallel code-changing tasks without git conflicts.
@@ -177,6 +182,19 @@ If a task crashes mid-worktree:
 3. If run is failed/cancelled and worktree is dirty → report to operator
 4. If run is completed → safe to clean up
+## Enforcement — Worktree Isolation Gate
+**Before creating or cleaning up worktrees, verify:**
+- [ ] Leader repo is clean before creating worktrees (assertCleanLeader passes)
+- [ ] One owner per file/symbol (no two worktrees edit same file)
+- [ ] Worktree naming is deterministic from run/task IDs (no user-controlled fragments)
+- [ ] Branch freshness checked before reuse (base branch hasn't diverged)
+- [ ] Dirty worktrees preserved by default (force=true only for forced removal)
+- [ ] Worktree paths under <repo-root>/.worktrees/ (never outside workspace)
+If ANY answer is NO → Stop. Verify worktree safety before proceeding.
 ## Anti-patterns
 - **Parallel editing same file**: Assign one owner per file. Use the task ID in branch names to track ownership.
@@ -185,8 +203,6 @@ If a task crashes mid-worktree:
 - **Storing worktrees outside workspace root**: All worktrees must be under `<repo-root>/.worktrees/`. Never store outside.
 - **Worktree name collision**: Use deterministic naming from run/task IDs, not user input.
----
 ## Source patterns
 - `src/worktree/worktree-manager.ts` — prepareTaskWorkspace, assertCleanLeader, linkNodeModulesIfPresent, sanitizeBranchPart
@@ -195,8 +211,6 @@ If a task crashes mid-worktree:
 - `src/runtime/team-runner.ts` — workspaceMode handling, worktree passed to task
 - `src/runtime/task-runner.ts` — worktreePath in task context
----
 ## Verification
 ```bash

package/src/agents/agent-config.ts CHANGED Viewed

@@ -1,4 +1,7 @@
-export type ResourceSource = "builtin" | "user" | "project" | "git";
+import type { RoleToolConfig } from "../config/role-tools.ts";
+import { getToolConfig } from "../config/role-tools.ts";
+export type ResourceSource = "builtin" | "user" | "project" | "git" | "dynamic";
 export interface RoutingMetadata {
 	triggers?: string[];
@@ -38,3 +41,39 @@ export interface AgentConfig {
 	disabled?: boolean;
 	override?: { source: "config"; path: string };
 }
+/**
+ * Get session options (tools/excludeTools) for a specific role.
+ * Used by child-pi to apply role-based tool restrictions.
+ */
+export function getAgentSessionOptions(role: string): {
+	tools?: string[];
+	excludeTools?: string[];
+} {
+	const config: RoleToolConfig = getToolConfig(role);
+	if (config.tools || config.excludeTools) {
+		return {
+			tools: config.tools,
+			excludeTools: config.excludeTools,
+		};
+	}
+	return {};
+}
+/**
+ * Build agent session options including role-based tool restrictions.
+ * @param agent - The agent configuration
+ * @param role - The role name to use for tool restrictions (defaults to agent.name)
+ */
+export function buildAgentSessionOptions(
+	agent: AgentConfig,
+	role?: string,
+): {
+	tools?: string[];
+	excludeTools?: string[];
+} {
+	const effectiveRole = role ?? agent.name;
+	return getAgentSessionOptions(effectiveRole);
+}

package/src/benchmark/benchmark-runner.ts CHANGED Viewed

@@ -32,6 +32,45 @@ export interface BenchmarkResult {
 	cost: number;
 }
+/**
+ * Validate command against allowlist to prevent shell injection.
+ * Only allows specific safe commands with arguments.
+ */
+/**
+ * Validate command against allowlist to prevent shell injection.
+ * Uses comprehensive shell metacharacter blocking similar to safe-bash.ts.
+ */
+function validateCommand(command: string): void {
+  // Basic allowlist - must start with allowed command
+  const allowlist = /^(pytest|grep|npm test|npx) /;
+  if (!allowlist.test(command)) {
+    throw new Error(`Command not allowed: ${command}. Only pytest, grep, npm test, npx allowed.`);
+  }
+  // Block shell metacharacters after command name
+  const afterCommand = command.substring(command.indexOf(" ") + 1);
+  // Block dangerous shell metacharacters
+  const dangerousPatterns = [
+    /[;&|`$(){}[\]<>\\]/,                    // Shell metacharacters
+    /\$\([^)]*\)/,                            // Command substitution $(...)
+    /`[^`]*`/,                                // Backtick command substitution
+    /\|/,                                     // Pipe
+    /&&/,                                     // And
+    /\|\|/,                                   // Or
+    />>/,                                     // Append redirect
+    /2>&1/,                                   // stderr redirect
+    />/,                                      // Output redirect
+    /</,                                      // Input redirect
+  ];
+  for (const pattern of dangerousPatterns) {
+    if (pattern.test(afterCommand)) {
+      throw new Error(`Shell metacharacters not allowed in command arguments`);
+    }
+  }
+}
 /**
  * Run a single benchmark task with tiered judges.
  * Tier 1: pytest (fast, deterministic)
@@ -49,6 +88,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
 			let output: string | undefined;
 			if (judge.type === "pytest" && judge.command) {
+				// Validate command before execution
+				validateCommand(judge.command);
 				// Tier 1: pytest - fast deterministic check
 				output = execSync(judge.command, {
 					timeout: 5000,
@@ -58,6 +99,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
 				// Look for pytest summary line with passed count
 				passed = output.includes("passed");
 			} else if (judge.type === "grep" && judge.pattern && judge.command) {
+				// Validate command before execution
+				validateCommand(judge.command);
 				// Tier 2: grep pattern matching
 				output = execSync(judge.command, {
 					timeout: 5000,
@@ -66,6 +109,8 @@ export async function runBenchmark(task: BenchmarkTask): Promise<BenchmarkResult
 				});
 				passed = output.includes(judge.pattern);
 			} else if (judge.type === "command" && judge.command) {
+				// Validate command before execution
+				validateCommand(judge.command);
 				// Tier 3: command execution
 				output = execSync(judge.command, {
 					timeout: 10000,

package/src/benchmark/feedback-loop.ts CHANGED Viewed

@@ -12,12 +12,17 @@ export interface FeedbackLoopStats {
 export class FeedbackLoop {
 	private runs: RunMetrics[] = [];
+	private static readonly MAX_RUNS = 1000;
 	/**
 	 * Record a run's metrics for learning.
 	 */
 	recordRun(metrics: RunMetrics): void {
 		this.runs.push(metrics);
+		// Trim to MAX_RUNS to prevent unbounded memory growth
+		if (this.runs.length > FeedbackLoop.MAX_RUNS) {
+			this.runs = this.runs.slice(-FeedbackLoop.MAX_RUNS);
+		}
 	}
 	/**

package/src/config/config.ts CHANGED Viewed

@@ -300,6 +300,7 @@ function mergeConfig(
 	base: PiTeamsConfig,
 	override: PiTeamsConfig,
 ): PiTeamsConfig {
+	const warnings: string[] = [];
 	const merged: PiTeamsConfig = {
 		...base,
 		...withoutUndefined(override as Record<string, unknown>),
@@ -439,6 +440,15 @@ function mergeConfig(
 		};
 		if (Object.keys(merged.otlp.headers ?? {}).length === 0)
 			delete merged.otlp.headers;
+		// Validate OTLP headers for injection attacks (newlines, CR, null bytes)
+		const invalidHeaders: string[] = [];
+		for (const [k, v] of Object.entries(merged.otlp.headers ?? {})) {
+			if (/[\r\n\x00]/.test(String(v))) { invalidHeaders.push(k); }
+		}
+		if (invalidHeaders.length > 0) {
+			delete merged.otlp.headers;
+			warnings.push(`OTLP headers blocked due to invalid characters: ${invalidHeaders.join(", ")}`);
+		}
 	}
 	if (
 		merged.agents?.overrides &&
@@ -1171,6 +1181,10 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
 	if (cwd) {
 		const projectPath = projectConfigPath(cwd);
 		const projectConfig = readOptionalConfig(projectPath);
+		// SECURITY FIX: Merge project config FIRST, then user config on top.
+		// This ensures user preferences always take precedence over project settings.
+		// Sensitive fields have already been sanitized by sanitizeProjectConfig.
+		let effectiveConfig = {};
 		if (projectConfig.exists) {
 			const projectSafeConfig = sanitizeProjectConfig(
 				projectPath,
@@ -1181,16 +1195,29 @@ export function loadConfig(cwd?: string): LoadedPiTeamsConfig {
 				...projectConfig.warnings,
 				...projectSafeConfig.warnings,
 			);
-			config = mergeConfig(config, projectSafeConfig.config);
+			effectiveConfig = mergeConfig(effectiveConfig, projectSafeConfig.config);
 		}
-		// `.pi/pi-crew.json` is the project-owned override file. If present and valid,
-		// it may override all pi-crew config fields, including agents.overrides.
-		// If missing or invalid, it is ignored and defaults/user config remain effective.
+		// User config always takes precedence over project config
+		effectiveConfig = mergeConfig(effectiveConfig, config);
+		config = effectiveConfig;
+		// `.pi/pi-crew.json` is the project-owned config file.
+		// SECURITY FIX: User config takes precedence over project-level `.pi/pi-crew.json`.
+		// This prevents malicious project configs from overriding user preferences.
 		const piCrewJsonPath = projectPiCrewJsonPath(cwd);
 		const piCrewJsonConfig = readOptionalConfig(piCrewJsonPath);
 		if (piCrewJsonConfig.exists) {
 			warnings.push(...piCrewJsonConfig.warnings);
-			config = mergeConfig(config, piCrewJsonConfig.config);
+			// Merge project config first, then user config on top
+			const projectPart = sanitizeProjectConfig(
+				piCrewJsonPath,
+				config,
+				piCrewJsonConfig.config,
+			);
+			warnings.push(...projectPart.warnings);
+			const mergedProject = mergeConfig(projectPart.config, config);
+			config = mergedProject;
 			paths.push(piCrewJsonPath);
 		}
 	}