npm - pi-crew - Versions diffs - 0.5.2 → 0.5.5 - Mend

pi-crew 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +67 -0
package/docs/bugs/cross-session-notification-leakage.md +82 -0
package/docs/coding-agent-optimization.md +268 -0
package/docs/deep-review-report.md +384 -0
package/docs/distillation/cybersecurity-patterns.md +294 -0
package/docs/migration-v0.4-v0.5.md +191 -0
package/docs/optimization-plan.md +642 -0
package/docs/pi-mono-opportunities.md +969 -0
package/docs/pi-mono-review.md +291 -0
package/docs/skills/REFERENCE.md +144 -0
package/package.json +7 -6
package/skills/artifact-analysis-loop/SKILL.md +302 -0
package/skills/async-worker-recovery/SKILL.md +19 -1
package/skills/child-pi-spawning/SKILL.md +19 -6
package/skills/context-artifact-hygiene/SKILL.md +19 -2
package/skills/delegation-patterns/SKILL.md +68 -3
package/skills/detection-pipeline-design/SKILL.md +285 -0
package/skills/event-log-tracing/SKILL.md +20 -6
package/skills/git-master/SKILL.md +20 -6
package/skills/hunting-investigation-loop/SKILL.md +401 -0
package/skills/incident-playbook-construction/SKILL.md +383 -0
package/skills/live-agent-lifecycle/SKILL.md +20 -6
package/skills/mailbox-interactive/SKILL.md +19 -6
package/skills/model-routing-context/SKILL.md +19 -1
package/skills/multi-perspective-review/SKILL.md +19 -4
package/skills/observability-reliability/SKILL.md +19 -2
package/skills/orchestration/SKILL.md +20 -2
package/skills/ownership-session-security/SKILL.md +20 -2
package/skills/pi-extension-lifecycle/SKILL.md +20 -2
package/skills/post-mortem/SKILL.md +7 -2
package/skills/read-only-explorer/SKILL.md +20 -6
package/skills/requirements-to-task-packet/SKILL.md +23 -3
package/skills/resource-discovery-config/SKILL.md +20 -2
package/skills/runtime-state-reader/SKILL.md +20 -2
package/skills/safe-bash/SKILL.md +21 -6
package/skills/scrutinize/SKILL.md +20 -2
package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
package/skills/security-review/SKILL.md +560 -0
package/skills/state-mutation-locking/SKILL.md +22 -2
package/skills/systematic-debugging/SKILL.md +8 -6
package/skills/threat-hypothesis-framework/SKILL.md +175 -0
package/skills/ui-render-performance/SKILL.md +20 -2
package/skills/verification-before-done/SKILL.md +17 -2
package/skills/widget-rendering/SKILL.md +21 -6
package/skills/workspace-isolation/SKILL.md +20 -6
package/skills/worktree-isolation/SKILL.md +20 -6
package/src/agents/agent-config.ts +40 -1
package/src/config/config.ts +22 -5
package/src/config/role-tools.ts +82 -0
package/src/config/types.ts +4 -0
package/src/extension/crew-cleanup.ts +114 -0
package/src/extension/register.ts +15 -3
package/src/extension/team-tool/run.ts +7 -7
package/src/observability/event-bus.ts +60 -0
package/src/runtime/background-runner.ts +8 -2
package/src/runtime/child-pi.ts +122 -34
package/src/runtime/crew-agent-runtime.ts +1 -0
package/src/runtime/foreground-control.ts +87 -17
package/src/runtime/pi-args.ts +11 -1
package/src/runtime/pi-json-output.ts +31 -0
package/src/runtime/progress-tracker.ts +124 -0
package/src/runtime/skill-effectiveness.ts +473 -0
package/src/runtime/skill-instructions.ts +37 -3
package/src/runtime/task-runner.ts +91 -17
package/src/runtime/team-runner.ts +11 -11
package/src/runtime/tool-progress.ts +10 -3
package/src/runtime/verification-gates.ts +367 -0
package/src/schema/team-tool-schema.ts +7 -0
package/src/state/decision-ledger.ts +92 -43
package/src/state/event-log.ts +136 -10
package/src/state/hook-instinct-bridge.ts +5 -5
package/src/state/state-store.ts +3 -1
package/src/state/types.ts +4 -0
package/src/types/new-api-types.ts +34 -0
package/src/ui/agent-management-overlay.ts +5 -1
package/src/ui/crew-widget.ts +29 -15
package/src/ui/powerbar-publisher.ts +100 -7
package/src/ui/tool-render.ts +15 -15
package/src/utils/session-utils.ts +52 -0
package/src/worktree/worktree-manager.ts +32 -13

package/docs/pi-mono-review.md ADDED Viewed

@@ -0,0 +1,291 @@
+# pi-mono Review: Full May 2026 Analysis
+**Date:** 2026-05-28
+**Reviewed:** Direct source reading of `packages/agent/`, `packages/ai/`, `packages/coding-agent/`
+**Source:** `origin/main` (up to date)
+> **Focused coding-agent analysis:** See [`docs/coding-agent-optimization.md`](./coding-agent-optimization.md) for actionable optimization opportunities for pi-crew.
+---
+## Executive Summary
+**No breaking changes found.** The entire May refactor is additive or internal. Both `Agent` (legacy harness) and `AgentHarness` (new harness) coexist. pi-crew's usage of the `Agent` class via `child-pi.ts` spawning is **fully compatible**.
+---
+## 1. Architecture: Two Harnesses Coexist
+### Legacy Harness: `Agent` class (`packages/agent/src/agent.ts`)
+```typescript
+// Still the primary harness used by coding-agent
+export class Agent {
+  async prompt(input: string | AgentMessage | AgentMessage[], images?: ImageContent[]): Promise<void>
+  async abort(): void
+  subscribe(listener: (event: AgentEvent, signal: AbortSignal) => void): () => void
+  // ... existing API unchanged
+}
+```
+This is what pi-crew's `child-pi.ts` spawns — **no breaking changes**.
+### New Harness: `AgentHarness` class (`packages/agent/src/harness/agent-harness.ts`)
+```typescript
+// New harness, built on top of runAgentLoop, with richer APIs
+export class AgentHarness {
+  async prompt(text: string, options?: AgentHarnessPromptOptions): Promise<AssistantMessage>
+  async steer(text: string): Promise<void>
+  async setModel(model: Model<any>): Promise<void>
+  async setThinkingLevel(level: ThinkingLevel): Promise<void>
+  async setResources(resources: AgentHarnessResources): Promise<void>
+  async navigateTree(options: NavigateTreeOptions): Promise<NavigateTreeResult>
+  async abort(): Promise<AbortResult>
+}
+```
+**Both use the same `runAgentLoop`** internally. `AgentHarness` wraps it with richer state management, resource loading, and session persistence.
+### Session System (`packages/agent/src/harness/session/`)
+New formal session infrastructure (1,008 lines across 7 files):
+```typescript
+// Session storage with JSONL backend
+SessionStorage<TMetadata> {
+  getMetadata(), setLeafId(), createEntryId(), appendEntry(),
+  getEntry(), findEntries(), getLabel(), getPathToRoot(), getEntries()
+}
+// Session repo with fork/list/delete
+SessionRepo<TMetadata, TCreateOptions, TListOptions> {
+  create(), open(), list(), delete(), fork()
+}
+```
+**pi-crew's event log** (`src/state/event-log.ts`) uses its own JSONL format — no conflict.
+---
+## 2. New Hooks (AgentHarness)
+### `context` hook
+Fires before each LLM call to allow context transformation:
+```typescript
+// agent-harness.ts line ~413
+const result = await this.emitHook({ type: "context", messages: [...messages] });
+```
+**pi-crew relevance:** Currently pi-crew uses `before_agent_start` only. The `context` hook would allow per-turn context injection (e.g., pruning, external context injection).
+### `resources_update` hook
+Fires when resources (skills/prompt templates) change mid-run:
+```typescript
+type: "resources_update";
+resources: AgentHarnessResources;
+previousResources: AgentHarnessResources;
+```
+**pi-crew relevance:** Useful for dynamic skill loading during task execution.
+### `model_select` / `thinking_level_select` hooks
+Fire when the model or thinking level changes mid-run.
+**pi-crew relevance:** Supports the `prepareNextTurn` dynamic model switching pattern.
+---
+## 3. New `prepareNextTurn` API
+```typescript
+// packages/agent/src/types.ts
+prepareNextTurn?: (
+  context: PrepareNextTurnContext,
+) => AgentLoopTurnUpdate | undefined | Promise<AgentLoopTurnUpdate | undefined>;
+interface AgentLoopTurnUpdate {
+  context?: AgentContext;      // replacement context
+  model?: Model<any>;          // new model for next turn
+  thinkingLevel?: ThinkingLevel; // new thinking level
+}
+```
+Called after each `turn_end` and before deciding whether to start another LLM call. Enables **dynamic model routing** mid-run without restarting.
+**pi-crew relevance:** Process-per-task model means each task is already isolated. No use for `prepareNextTurn`. However, this could enable a future single-process execution mode.
+---
+## 4. New `shouldStopAfterTurn` API
+```typescript
+shouldStopAfterTurn?: (context: ShouldStopAfterTurnContext) => boolean | Promise<boolean>;
+```
+Called after each turn completes. Return `true` to gracefully stop after the current turn (without starting another LLM call).
+**pi-crew relevance:** Could be used to implement turn-count-based task completion (instead of relying on `maxTurns` in child-pi).
+---
+## 5. New `transformContext` API
+```typescript
+transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
+```
+Applied to context before `convertToLlm` at each turn. For context window management or external context injection.
+**pi-crew relevance:** Could replace the current approach of rewriting prompts in `before_agent_start` — instead, transform the full context between turns.
+---
+## 6. Result Type System
+```typescript
+// packages/agent/src/harness/types.ts
+export type Result<TValue, TError> =
+  | { ok: true; value: TValue }
+  | { ok: false; error: TError };
+export function ok<TValue, TError>(value: TValue): Result<TValue, TError>
+export function err<TValue, TError>(error: TError): Result<TValue, TError>
+export function getOrThrow<TValue, TError>(result: Result<TValue, TError>): TValue
+```
+Formal result type for all harness filesystem and execution operations. Prevents thrown exceptions for expected failures.
+**pi-crew relevance:** No current use. If pi-crew ever uses `AgentHarness` directly, this would be the expected error-handling pattern.
+---
+## 7. Image Generation API (`@earendil-works/pi-ai`)
+```typescript
+// packages/ai/src/images.ts
+export async function generateImages<TApi extends ImagesApi>(
+  model: ImagesModel<TApi>,
+  context: ImagesGenerationContext,
+  options?: ImagesGenerationOptions
+): Promise<ImageResult[]>
+```
+New image generation capability. Providers: OpenRouter images, Flux, DALL-E, etc.
+**pi-crew relevance:** Tasks can now use image generation. No API change needed — pi handles it.
+---
+## 8. Explicit Session ID Naming
+```typescript
+// packages/coding-agent/src/core/session-manager.ts
+this.sessionId = options?.id ?? createSessionId();
+```
+Users can now specify a custom session ID on startup.
+**pi-crew relevance:** Could enhance `inheritContext` feature — pass a named session instead of raw JSON.
+---
+## 9. Stream Options Patch System
+```typescript
+// AgentHarnessStreamOptionsPatch — returned by before_provider_request hooks
+export interface AgentHarnessStreamOptionsPatch {
+  transport?: Transport;
+  timeoutMs?: number;
+  maxRetries?: number;
+  headers?: Record<string, string | undefined>; // undefined = delete
+  metadata?: Record<string, unknown | undefined>;
+}
+```
+Hooks can now **modify stream options** before each LLM call (per-turn patching).
+**pi-crew relevance:** Could enable per-task timeout/retries via hooks instead of process-level limits.
+---
+## 10. Bug Fixes Affecting pi-crew
+### Tool Preflight Abort (`b9448276`)
+**Before:** When a run was aborted, sibling tool calls kept preparing in parallel.
+**After:** `signal?.aborted` check breaks the tool execution loop immediately.
+```typescript
+// agent-loop.ts
+if (signal?.aborted) {
+  break; // Stop preparing sibling tool calls
+}
+```
+**pi-crew relevance:** When pi-crew calls `cancel` on a running task, pi now correctly stops tool preflight immediately. Previously, pending tool calls could continue executing even after cancellation.
+### RPC Child Process Exit (`e007fcd0`)
+RPC now rejects pending requests when child process exits. Affects `child-pi.ts` communication.
+---
+## 11. AgentHarness Key Source Files
+| File | Lines | Purpose |
+|------|-------|---------|
+| `harness/agent-harness.ts` | ~950 | Main orchestrator |
+| `harness/types.ts` | ~817 | All types, hooks, error codes |
+| `harness/session/session.ts` | 252 | Session abstraction |
+| `harness/session/jsonl-storage.ts` | 293 | JSONL persistence |
+| `harness/session/session-repo.ts` | 231 | Session CRUD |
+| `harness/skills.ts` | 375 | Skill loading + formatting |
+| `harness/prompt-templates.ts` | 267 | Prompt template processing |
+| `harness/compaction/compaction.ts` | 842 | Transcript compaction |
+| `harness/compaction/branch-summarization.ts` | 355 | Branch summarization |
+| `harness/env/nodejs.ts` | 370+ | Node.js execution environment |
+| `harness/execution-env.ts` | Abstract | FS + shell abstraction |
+---
+## 12. Opportunities for pi-crew Enhancement
+> **Full plans:** [`docs/pi-mono-opportunities.md`](./pi-mono-opportunities.md)
+### High Priority
+**BM25 Semantic Reranking** — Fix `recommendTeam()` keyword failures by integrating existing BM25 search.
+### Medium Priority
+**Extended Hook Phases** — `before_turn`/`after_turn` hooks using existing `turn_end` tracking in `child-pi.ts`.
+**Hook Lifecycle Tests** — Cover untested hooks: `task_result`, `before_retry`, `before_publish`, `session_before_switch`, `run_recovery`.
+### Future (6+ months)
+**AgentHarness Migration** — When `AgentHarness` stabilizes (removes `Agent` dependency), pi-crew could replace `child-pi.ts` spawning with harness-based in-process execution. **Not a current concern.**
+---
+## 13. Summary
+| Check | Result |
+|-------|--------|
+| Breaking API changes | **None** |
+| `Agent` class API | **Unchanged** — pi-crew compatible |
+| `AgentHarness` class | **New** — additive, not used by pi-crew |
+| New hooks | `context`, `resources_update`, `model_select`, `thinking_level_select` |
+| New lifecycle APIs | `prepareNextTurn`, `shouldStopAfterTurn`, `transformContext` |
+| New providers/features | Together AI, Xiaomi MiMo, Image generation, Codex websocket |
+| Bug fixes affecting pi-crew | Tool preflight abort, RPC child exit |
+| Migration path | AgentHarness (6+ months out, not urgent) |
+**Conclusion:** pi-crew is fully compatible with the latest pi source. The `AgentHarness` refactor is substantial but additive — it coexists with the legacy `Agent` class that pi-crew uses. Focus on pi-crew-specific enhancements. Monitor `AgentHarness` stabilization for future migration.

package/docs/skills/REFERENCE.md ADDED Viewed

@@ -0,0 +1,144 @@
+# pi-crew Skills Reference
+## Skill Chains
+### Bug Investigation
+```
+systematic-debugging (4 phases with refuse gate)
+    ↓
+verification-before-done (evidence before claim)
+    ↓
+post-mortem (RCA documentation)
+```
+### Multi-phase Work
+```
+orchestration (phase coordination)
+    ↓
+delegation-patterns (task splitting)
+    ↓
+verification-before-done (after each phase)
+```
+### Code Review (Quick)
+```
+scrutinize (outsider perspective + simpler alternative)
+```
+### Code Review (Deep)
+```
+scrutinize (outsider perspective)
+    ↓
+multi-perspective-review (8-pass deep review)
+    ↓
+secure-agent-orchestration-review (security focus)
+```
+---
+## When to Invoke
+| Situation | Skill |
+|-----------|-------|
+| Bug / test failure / crash | `systematic-debugging` |
+| Before claiming done | `verification-before-done` |
+| Code review (quick) | `scrutinize` |
+| Code review (deep) | `multi-perspective-review` |
+| Task delegation | `delegation-patterns` |
+| Complex multi-phase work | `orchestration` |
+| After bug is fixed | `post-mortem` |
+| Security review | `security-review` |
+| Workspace safety | `workspace-isolation` |
+| Bash safety | `safe-bash` |
+| Hypothesis-driven investigation | `threat-hypothesis-framework` |
+| Active threat hunting | `hunting-investigation-loop` |
+| Artifact examination | `artifact-analysis-loop` |
+| Building response procedures | `incident-playbook-construction` |
+| Designing detection pipelines | `detection-pipeline-design` |
+---
+## Skills Inventory
+### Core Discipline
+| Skill | Description |
+|-------|-------------|
+| `systematic-debugging` | Four-phase debugging with refuse gates, falsify-first discipline |
+| `verification-before-done` | Evidence before claims |
+| `orchestration` | Multi-phase coordination, 8 rules including "respawn not absorb" |
+### Security
+| Skill | Description |
+|-------|-------------|
+| `security-review` | Security review with audit and detection authoring |
+| `threat-hypothesis-framework` | Hypothesis-driven investigation |
+| `hunting-investigation-loop` | Active threat hunting with validation |
+| `artifact-analysis-loop` | Artifact analysis with IOC extraction |
+| `incident-playbook-construction` | Playbook building with steps, decisions, SLAs |
+| `detection-pipeline-design` | Data pipeline design for security monitoring |
+### Documentation
+| Skill | Description |
+|-------|-------------|
+| `post-mortem` | Engineering RCA record |
+### Delegation
+| Skill | Description |
+|-------|-------------|
+| `delegation-patterns` | Task splitting patterns |
+| `requirements-to-task-packet` | Task packet creation |
+### Runtime/Safety
+| Skill | Description |
+|-------|-------------|
+| `workspace-isolation` | Security boundary enforcement |
+| `worktree-isolation` | Git worktree safety |
+| `safe-bash` | Bash command safety |
+| `state-mutation-locking` | State mutation protection |
+### Observability
+| Skill | Description |
+|-------|-------------|
+| `event-log-tracing` | JSONL event log analysis |
+| `runtime-state-reader` | Runtime state inspection |
+| `observability-reliability` | Reliability patterns |
+---
+## Anti-patterns
+| Anti-pattern | Skill | Rule |
+|--------------|-------|------|
+| Proposing fix before reproducing | `systematic-debugging` | Refuse Gate |
+| Running proof before disproof | `systematic-debugging` | Phase 3 |
+| Claiming "tests pass" without fresh run | `verification-before-done` | Gate Function |
+| Reviewing diff-local without tracing path | `scrutinize` | Trace step |
+| Skipping simpler-alternative pass | `multi-perspective-review` | Pre-review |
+| Editing files yourself as orchestrator | `orchestration` | Rule 1 |
+| Dispatching serially when parallel possible | `orchestration` | Rule 3 |
+| Committing a red tree | `orchestration` | Rule 6 |
+| Absorbing subagent's broken work | `orchestration` | Rule 7 |
+| Rubber-stamp review | `multi-perspective-review` | Rules |
+---
+## Key Enforcement Patterns (from 9arm)
+| Pattern | Implemented In |
+|---------|---------------|
+| **Refuse Gate** | `systematic-debugging` |
+| **Recite Ritual** | `systematic-debugging` (Invocation) |
+| **Falsify Before Proof** | `systematic-debugging` (Phase 3) |
+| **Simpler Alternative Pass** | `scrutinize`, `multi-perspective-review` |
+| **Required Inputs Gate** | `post-mortem` |
+| **Respawn Not Absorb** | `orchestration` (Rule 7) |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-crew",
-  "version": "0.5.2",
+  "version": "0.5.5",
   "description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
   "author": "baphuongna",
   "license": "MIT",
@@ -48,7 +48,7 @@
     "check:lazy-imports": "node scripts/check-lazy-imports.mjs",
     "typecheck": "tsc --noEmit && node --experimental-strip-types -e \"await import('./index.ts'); console.log('strip-types import ok')\"",
     "test": "npm run test:unit && npm run test:integration",
-    "test:unit": "node --experimental-strip-types --test --test-concurrency=4 --test-timeout=30000 --test-force-exit test/unit/*.test.ts",
+    "test:unit": "node --experimental-strip-types --test --test-concurrency=4 --test-timeout=180000 --test-force-exit test/unit/*.test.ts",
     "test:watch": "node --experimental-strip-types --watch --test --test-concurrency=4 --test-timeout=30000 --test-force-exit test/unit/*.test.ts",
     "test:integration": "node --experimental-strip-types --test --test-concurrency=1 --test-timeout=120000 test/integration/*.test.ts",
     "build:bundle": "node scripts/build-bundle.mjs",
@@ -80,6 +80,7 @@
   },
   "dependencies": {
     "@sinclair/typebox": "^0.34.49",
+    "ajv": "^8.20.0",
     "cli-highlight": "^2.1.11",
     "diff": "^5.2.0",
     "jiti": "^2.6.1",
@@ -87,10 +88,10 @@
   },
   "devDependencies": {
     "@biomejs/biome": "^2.4.15",
-    "@earendil-works/pi-agent-core": "^0.75.5",
-    "@earendil-works/pi-ai": "^0.75.5",
-    "@earendil-works/pi-coding-agent": "^0.75.5",
-    "@earendil-works/pi-tui": "^0.75.5",
+    "@earendil-works/pi-agent-core": "^0.77.0",
+    "@earendil-works/pi-ai": "^0.77.0",
+    "@earendil-works/pi-coding-agent": "^0.77.0",
+    "@earendil-works/pi-tui": "^0.77.0",
     "esbuild": "^0.28.0",
     "typescript": "^5.9.3"
   },