npm - @posthog/agent - Versions diffs - 2.0.0 → 2.0.2 - Mend

@posthog/agent 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/LICENSE +1 -1
package/README.md +221 -219
package/dist/adapters/claude/conversion/tool-use-to-acp.d.ts +21 -0
package/dist/adapters/claude/conversion/tool-use-to-acp.js +547 -0
package/dist/adapters/claude/conversion/tool-use-to-acp.js.map +1 -0
package/dist/adapters/claude/permissions/permission-options.d.ts +13 -0
package/dist/adapters/claude/permissions/permission-options.js +117 -0
package/dist/adapters/claude/permissions/permission-options.js.map +1 -0
package/dist/adapters/claude/questions/utils.d.ts +132 -0
package/dist/adapters/claude/questions/utils.js +63 -0
package/dist/adapters/claude/questions/utils.js.map +1 -0
package/dist/adapters/claude/tools.d.ts +18 -0
package/dist/adapters/claude/tools.js +95 -0
package/dist/adapters/claude/tools.js.map +1 -0
package/dist/agent-DBQY1BfC.d.ts +123 -0
package/dist/agent.d.ts +5 -0
package/dist/agent.js +3656 -0
package/dist/agent.js.map +1 -0
package/dist/claude-cli/cli.js +3695 -2746
package/dist/claude-cli/vendor/ripgrep/COPYING +3 -0
package/dist/claude-cli/vendor/ripgrep/arm64-darwin/rg +0 -0
package/dist/claude-cli/vendor/ripgrep/arm64-darwin/ripgrep.node +0 -0
package/dist/claude-cli/vendor/ripgrep/arm64-linux/rg +0 -0
package/dist/claude-cli/vendor/ripgrep/arm64-linux/ripgrep.node +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-darwin/rg +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-darwin/ripgrep.node +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-linux/rg +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-linux/ripgrep.node +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-win32/rg.exe +0 -0
package/dist/claude-cli/vendor/ripgrep/x64-win32/ripgrep.node +0 -0
package/dist/gateway-models.d.ts +24 -0
package/dist/gateway-models.js +93 -0
package/dist/gateway-models.js.map +1 -0
package/dist/index.d.ts +170 -1157
package/dist/index.js +9373 -5135
package/dist/index.js.map +1 -1
package/dist/logger-DDBiMOOD.d.ts +24 -0
package/dist/posthog-api.d.ts +40 -0
package/dist/posthog-api.js +175 -0
package/dist/posthog-api.js.map +1 -0
package/dist/server/agent-server.d.ts +41 -0
package/dist/server/agent-server.js +10503 -0
package/dist/server/agent-server.js.map +1 -0
package/dist/server/bin.d.ts +1 -0
package/dist/server/bin.js +10558 -0
package/dist/server/bin.js.map +1 -0
package/dist/types.d.ts +129 -0
package/dist/types.js +1 -0
package/dist/types.js.map +1 -0
package/package.json +65 -13
package/src/acp-extensions.ts +98 -16
package/src/adapters/acp-connection.ts +494 -0
package/src/adapters/base-acp-agent.ts +150 -0
package/src/adapters/claude/claude-agent.ts +596 -0
package/src/adapters/claude/conversion/acp-to-sdk.ts +102 -0
package/src/adapters/claude/conversion/sdk-to-acp.ts +571 -0
package/src/adapters/claude/conversion/tool-use-to-acp.ts +618 -0
package/src/adapters/claude/hooks.ts +64 -0
package/src/adapters/claude/mcp/tool-metadata.ts +102 -0
package/src/adapters/claude/permissions/permission-handlers.ts +433 -0
package/src/adapters/claude/permissions/permission-options.ts +103 -0
package/src/adapters/claude/plan/utils.ts +56 -0
package/src/adapters/claude/questions/utils.ts +92 -0
package/src/adapters/claude/session/commands.ts +38 -0
package/src/adapters/claude/session/mcp-config.ts +37 -0
package/src/adapters/claude/session/models.ts +12 -0
package/src/adapters/claude/session/options.ts +236 -0
package/src/adapters/claude/tool-meta.ts +143 -0
package/src/adapters/claude/tools.ts +53 -688
package/src/adapters/claude/types.ts +61 -0
package/src/adapters/codex/spawn.ts +130 -0
package/src/agent.ts +96 -587
package/src/execution-mode.ts +43 -0
package/src/gateway-models.ts +135 -0
package/src/index.ts +79 -0
package/src/otel-log-writer.test.ts +105 -0
package/src/otel-log-writer.ts +94 -0
package/src/posthog-api.ts +75 -235
package/src/resume.ts +115 -0
package/src/sagas/apply-snapshot-saga.test.ts +690 -0
package/src/sagas/apply-snapshot-saga.ts +88 -0
package/src/sagas/capture-tree-saga.test.ts +892 -0
package/src/sagas/capture-tree-saga.ts +141 -0
package/src/sagas/resume-saga.test.ts +558 -0
package/src/sagas/resume-saga.ts +332 -0
package/src/sagas/test-fixtures.ts +250 -0
package/src/server/agent-server.test.ts +220 -0
package/src/server/agent-server.ts +748 -0
package/src/server/bin.ts +88 -0
package/src/server/jwt.ts +65 -0
package/src/server/schemas.ts +47 -0
package/src/server/types.ts +13 -0
package/src/server/utils/retry.test.ts +122 -0
package/src/server/utils/retry.ts +61 -0
package/src/server/utils/sse-parser.test.ts +93 -0
package/src/server/utils/sse-parser.ts +46 -0
package/src/session-log-writer.test.ts +140 -0
package/src/session-log-writer.ts +137 -0
package/src/test/assertions.ts +114 -0
package/src/test/controllers/sse-controller.ts +107 -0
package/src/test/fixtures/api.ts +111 -0
package/src/test/fixtures/config.ts +33 -0
package/src/test/fixtures/notifications.ts +92 -0
package/src/test/mocks/claude-sdk.ts +251 -0
package/src/test/mocks/msw-handlers.ts +48 -0
package/src/test/setup.ts +114 -0
package/src/test/wait.ts +41 -0
package/src/tree-tracker.ts +173 -0
package/src/types.ts +54 -137
package/src/utils/acp-content.ts +58 -0
package/src/utils/async-mutex.test.ts +104 -0
package/src/utils/async-mutex.ts +31 -0
package/src/utils/common.ts +15 -0
package/src/utils/gateway.ts +9 -6
package/src/utils/logger.ts +0 -30
package/src/utils/streams.ts +220 -0
package/CLAUDE.md +0 -331
package/src/adapters/claude/claude.ts +0 -1947
package/src/adapters/claude/mcp-server.ts +0 -810
package/src/adapters/claude/utils.ts +0 -267
package/src/adapters/connection.ts +0 -95
package/src/file-manager.ts +0 -273
package/src/git-manager.ts +0 -577
package/src/schemas.ts +0 -241
package/src/session-store.ts +0 -259
package/src/task-manager.ts +0 -163
package/src/todo-manager.ts +0 -180
package/src/tools/registry.ts +0 -134
package/src/tools/types.ts +0 -133
package/src/utils/tapped-stream.ts +0 -60
package/src/worktree-manager.ts +0 -974

package/LICENSE CHANGED Viewed

@@ -3,7 +3,7 @@ Business Source License 1.1
 Parameters
 Licensor: PostHog Inc.
-Licensed Work: Array & Posthog Agent package
+Licensed Work: Twig & Posthog Agent package
 The Licensed Work is © 2025 PostHog Inc.
 Change Date: None (the Licensed Work will remain under this License indefinitely unless Licensor specifies otherwise)

package/README.md CHANGED Viewed

@@ -1,265 +1,267 @@
-# PostHog Agent SDK
+# @posthog/agent
+The core runtime for PostHog cloud runs. Provides two things: an **Agent SDK** for running AI agents against PostHog tasks, and an **AgentServer** CLI that hosts the agent inside cloud sandboxes. Both are built on the [Agent Client Protocol (ACP)](https://github.com/anthropics/agent-client-protocol) for standardized agent ↔ client communication.
+## Architecture
+```text
+┌──────────────────────────────────────────────────────────────────┐
+│  Client (Twig IDE or local CLI)                                  │
+│    connects via SSE/JSON-RPC (cloud) or in-process streams (local)│
+└────────────────────┬─────────────────────────────────────────────┘
+                     │
+          ┌──────────▼──────────┐
+          │    AgentServer      │  (cloud only — Hono HTTP server)
+          │  GET /events (SSE)  │
+          │  POST /command      │
+          └──────────┬──────────┘
+                     │
+          ┌──────────▼──────────┐
+          │   ACP Connection    │  createAcpConnection()
+          │  (ndJson streams)   │
+          │                     │
+          │  ┌── tap ──┐        │  both directions intercepted for:
+          │  │ logging │        │  • SessionLogWriter (OTEL / S3)
+          │  │ SSE     │        │  • SSE broadcast to clients
+          │  └─────────┘        │
+          └──────────┬──────────┘
+                     │
+        ┌────────────┼────────────┐
+        ▼                         ▼
+  ┌─────────────┐         ┌─────────────┐
+  │ Claude      │         │ Codex       │
+  │ Adapter     │         │ Adapter     │
+  │             │         │             │
+  │ ClaudeAcp-  │         │ spawnCodex- │
+  │ Agent       │         │ Process()   │
+  │ (in-process)│         │ (subprocess)│
+  └──────┬──────┘         └──────┬──────┘
+         │                       │
+         ▼                       ▼
+  Claude Agent SDK        codex-acp binary
+  query()                 stdin/stdout
+```
-TypeScript agent framework that wraps the Claude Agent SDK for PostHog's Array desktop app. Features a Git-based task execution system that stores task artifacts alongside your code.
+## Design decisions
-## Quick Start
+### Why ACP?
-```bash
-pnpm install
-pnpm run example
-```
+ACP is a standard protocol for agent ↔ client communication over ndJson streams. Using it gives us two things:
-## Key Features
+1. **Any ACP-compatible client can connect** — the protocol is the contract, not our code.
+2. **Clean separation** — the agent adapter knows nothing about HTTP, and the server knows nothing about Claude/Codex. They communicate through typed streams.
-- **Git-Based Task Execution**: Plans and artifacts stored in `.posthog/` folders and committed to Git
-- **PostHog Integration**: Fetches existing tasks from PostHog API
-- **3-Phase Execution**: Research → Plan → Build with automatic progression
-- **Branch Management**: Automatic branch creation for planning and implementation
-- **Progress Tracking**: Execution status stored in PostHog `TaskRun` records for easy polling
+### Cloud vs local
-## Usage
+The same ACP agent runs in both contexts. The difference is how it's connected:
-```typescript
-import { Agent, PermissionMode } from '@posthog/agent';
-import type { AgentEvent } from '@posthog/agent';
+**Cloud (AgentServer):** The agent runs inside a sandbox. `AgentServer` is an HTTP server (Hono) that wraps the ACP connection. Clients connect via `GET /events` (SSE) and `POST /command` (JSON-RPC). Authentication uses JWT tokens (RS256) — the sandbox holds a public key, PostHog Django holds the private key. In background mode, the server auto-starts, prompts the agent with the task description, and signals completion via the PostHog API. In interactive mode, it stays open for conversation.
-const agent = new Agent({
-    workingDirectory: "/path/to/repo",
-    posthogApiUrl: "https://app.posthog.com",
-    posthogApiKey: process.env.POSTHOG_API_KEY, // Used for both API and MCP
-    posthogProjectId: 1,
-    onEvent: (event) => {
-      // Streamed updates for responsive UIs
-      if (event.type !== 'token') {
-        handleLiveEvent(event);
-      }
-    },
-});
-// Run a task
-const taskId = "task_abc123";
-const task = await agent.getPostHogClient()?.fetchTask(taskId);
-await agent.runTask(task, {
-  repositoryPath: "/path/to/repo",
-  permissionMode: PermissionMode.ACCEPT_EDITS,
-  isCloudMode: false,
-  createPR: true, // Optional: create PR after build. This setting has no effect if running in cloud mode.
-  autoProgress: true,
-});
-```
+**Local (Twig desktop):** The agent runs in-process. Twig calls `createAcpConnection()` directly — no HTTP server, no JWT. The bidirectional ACP streams connect client ↔ agent within the same process.
-For local MCP development:
+**TreeTracker** handles the bridge between these contexts: it captures the git working tree as snapshots (tree hash + file archive) so work can be transferred between cloud and local. This enables the "hand off" flow — start locally, continue in cloud, or vice versa. Tree snapshots use the Saga pattern (`src/sagas/`) for atomic operations with automatic rollback on failure.
-```typescript
-const agent = new Agent({
-  workingDirectory: "/path/to/repo",
-  posthogMcpUrl: 'http://localhost:8787/mcp',
-});
-```
+### Permission modes
-## Task Execution
+Four modes defined in `src/execution-mode.ts`:
-Each task execution creates Git branches and follows a 3-phase approach:
+| Mode               | ID                  | Behavior                                                        |
+| ------------------- | ------------------- | --------------------------------------------------------------- |
+| Always ask          | `default`           | Prompts for permission on first use of each tool                |
+| Accept edits        | `acceptEdits`       | Auto-approves file write tools for the session                  |
+| Plan mode           | `plan`              | Read-only — the agent can analyze but not modify files          |
+| Bypass permissions  | `bypassPermissions` | Auto-approves everything (hidden when running as root)          |
-1. **Research Phase**: Analyzes the codebase and may generate clarifying questions
-2. **Planning Phase**: Creates an implementation plan in `.posthog/{id}/plan.md` on branch `posthog/task-{id}-planning`
-3. **Build Phase**: Implements code changes on branch `posthog/task-{id}-implementation`
+In cloud background mode, permissions are always auto-approved. In interactive mode, the permission system is active and configurable per session. Tool categorization lives in `src/adapters/claude/tools.ts` — each tool belongs to a group (read, write, bash, search, web, agent) and modes whitelist groups.
-## File System
+## ACP connection layer
-```
-your-repo/
-├── .posthog/
-│   ├── README.md
-│   ├── .gitignore
-│   └── {task-id}/
-│       ├── plan.md
-│       ├── questions.json (if research phase generated questions)
-│       └── context.md (optional)
-└── (your code)
-```
+`createAcpConnection()` in `src/adapters/acp-connection.ts` is the heart of the package. It's a factory that returns a `{ clientStreams, cleanup }` object — a pair of ndJson `ReadableStream`/`WritableStream` that the caller uses to speak ACP.
-## Progress Updates
+Internally it does three things:
-Progress for each task execution is persisted to PostHog's `TaskRun` model, so UIs can poll for updates without relying on streaming hooks:
+1. **Creates bidirectional streams** — two pairs of `(readable, writable)` using `createBidirectionalStreams()`. One pair for the agent side, one for the client side, cross-wired so writes on one appear as reads on the other.
-```typescript
-const agent = new Agent({
-  workingDirectory: repoPath,
-  posthogApiUrl: "https://app.posthog.com",
-  posthogApiKey: process.env.POSTHOG_KEY,
-  posthogProjectId: 1,
-});
-const poller = setInterval(async () => {
-  const client = agent.getPostHogClient();
-  const runs = await client?.listTaskRuns(taskId);
-  const latestRun = runs?.sort((a, b) =>
-    new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
-  )[0];
-  if (latestRun) {
-    // Fetch logs from S3 using presigned URL
-    const logs = await client?.fetchTaskRunLogs(latestRun);
-    renderProgress(latestRun.status, logs || []);
-  }
-}, 3000);
-try {
-  await agent.runTask(task, { repositoryPath: repoPath });
-} finally {
-  clearInterval(poller);
-}
-// Live stream still available through the onEvent hook
-function handleLiveEvent(event: AgentEvent) {
-  switch (event.type) {
-    case 'status':
-      // optimistic UI update
-      break;
-    case 'error':
-      notifyError(event.message);
-      break;
-    default:
-      break;
-  }
-}
-```
+2. **Taps both directions for logging** — if a `logWriter` and `taskRunId` are provided, both the agent→client and client→agent writables are wrapped with `createTappedWritableStream`. Every ndJson line that flows through is appended to the `SessionLogWriter` buffer. This is transparent to both ends.
-> Prefer streaming updates? Pass an `onEvent` handler when constructing the agent to keep receiving real-time events while progress is also written to PostHog.
+3. **Connects the adapter** — for Claude, it instantiates `ClaudeAcpAgent` and wires it to the agent-side streams via `AgentSideConnection`. For Codex, it spawns a subprocess and pipes the client-side streams to the process's stdin/stdout.
-## Requirements
+The Claude and Codex paths differ significantly:
-- PNPM
-- Git repository
-- PostHog API access
-- Claude API access via `@anthropic-ai/claude-agent-sdk`
+**Claude (in-process):** The `AgentSideConnection` calls methods on `ClaudeAcpAgent` directly. The agent implements the full ACP `Agent` interface: `initialize`, `newSession`, `prompt`, `cancel`, etc. Under the hood, `prompt()` creates a Claude Agent SDK `Query` and processes messages in a loop, converting between ACP and SDK formats using the `src/adapters/claude/conversion/` module.
-## Configuration Options
+**Codex (subprocess):** There's no `AgentSideConnection` — the `codex-acp` binary speaks ACP natively on stdin/stdout. The connection layer adds `TransformStream` filters on both directions to: suppress noisy `session/update` messages during session loading, inject `_posthog/sdk_session` notifications, filter model lists to allowed IDs, and sync reasoning effort config before prompts.
-You can customize behavior using `TaskExecutionOptions`:
+## AgentServer
-```ts
-await agent.runTask(task, {
-  repositoryPath: "/path/to/repo",
-  permissionMode: PermissionMode.ACCEPT_EDITS, // or PLAN, DEFAULT, BYPASS
-  isCloudMode: false, // local execution with pauses between phases
-  autoProgress: true, // automatically progress through phases
-  queryOverrides: {
-    model: 'claude-sonnet-4-5-20250929',
-    temperature: 0.7
-  }
-});
+`AgentServer` (`src/server/agent-server.ts`) wraps an ACP connection in an HTTP server for cloud sandbox execution. It manages a single `ActiveSession` at a time.
+### Session initialization flow
+```text
+start()
+  │
+  ├─ Hono HTTP server starts on configured port
+  │
+  └─ autoInitializeSession()
+       │
+       ├─ Creates synthetic JwtPayload from CLI config
+       ├─ configureEnvironment() — sets ANTHROPIC_BASE_URL, OPENAI_BASE_URL, etc.
+       │    pointing at the PostHog LLM gateway
+       ├─ Creates TreeTracker, SessionLogWriter, PostHogAPIClient
+       ├─ createAcpConnection() — sets up ACP streams with log tapping
+       │
+       ├─ Wraps client streams with a SECOND tap layer (NdJsonTap)
+       │    that broadcasts every ACP message to SSE clients
+       │
+       ├─ ClientSideConnection.initialize() — ACP handshake
+       ├─ ClientSideConnection.newSession() — starts agent session
+       │
+       └─ sendInitialTaskMessage()
+            ├─ Fetches task from PostHog API
+            ├─ Sends task.description as first prompt
+            └─ Background mode: signals completion/failure via API
+               Interactive mode: stays open
 ```
-## Fine-Grained Permissions
+The two tapping layers are distinct. The inner tap (from `createAcpConnection`) persists to logs. The outer tap (in `AgentServer`) broadcasts to SSE. This means log persistence works for both cloud and local, while SSE broadcast is cloud-only.
-For advanced control over agent actions, you can provide a `canUseTool` callback that intercepts every tool use during the **build phase** (for task execution) or **direct run calls**. This allows you to implement custom approval flows, logging, or restrictions.
+### HTTP endpoints
-See the [Claude Agent SDK Permissions docs](https://docs.claude.com/en/api/agent-sdk/permissions) for more details.
+| Method | Path       | Auth | Description                                              |
+| ------ | ---------- | ---- | -------------------------------------------------------- |
+| `GET`  | `/health`  | None | Returns `{ status: "ok", hasSession }`                   |
+| `GET`  | `/events`  | JWT  | SSE stream — all ACP notifications broadcast in real time |
+| `POST` | `/command` | JWT  | JSON-RPC commands: `user_message`, `cancel`, `close`     |
-### Per-Agent Configuration
+JWT validation (`src/server/jwt.ts`) uses RS256 with a configurable public key. The JWT payload carries `task_id`, `run_id`, `team_id`, `user_id`, `distinct_id`, and `mode`. The audience must be `posthog:sandbox_connection`.
-Apply the same permission hook to all task executions and direct runs:
+### Commands flow through ACP
-```typescript
-import { Agent } from '@posthog/agent';
-import type { PermissionResult } from '@posthog/agent';
+When `POST /command` receives a `user_message`, it doesn't handle it directly — it calls `clientConnection.prompt()` on the ACP `ClientSideConnection`, which sends a `session/prompt` message through the ACP streams to the agent. Similarly, `cancel` sends `session/cancel`. This means all commands follow the same path as in-process calls from Twig, with the HTTP layer just being a thin translation.
-const agent = new Agent({
-  workingDirectory: "/path/to/repo",
-  posthogApiUrl: "https://app.posthog.com",
-  posthogApiKey: process.env.POSTHOG_API_KEY,
-  posthogProjectId: 1,
-  canUseTool: async (toolName, input, { signal, suggestions }) => {
-    // Block destructive commands
-    if (toolName === 'Bash' && input.command?.includes('rm -rf')) {
-      return {
-        behavior: 'deny',
-        message: 'Destructive rm -rf commands are not allowed',
-        interrupt: true
-      };
-    }
-    // Allow everything else
-    return {
-      behavior: 'allow',
-      updatedInput: input
-    };
-  }
-});
+### Auto-approval in cloud mode
+The `AgentServer` provides a `requestPermission` callback to the `ClientSideConnection` that always selects the "allow" option. In background mode this is necessary (no human to ask). In interactive mode it currently does the same, with a TODO for future per-tool approval via SSE round-trips.
+### Tree state capture
+After every `Write` or `Edit` tool call, the server captures a git tree snapshot via `TreeTracker` and broadcasts it as a `_posthog/tree_snapshot` SSE event. A final snapshot is captured during session cleanup. This is how the client knows what files changed and can restore state for cloud↔local handoff.
+### CLI
+```bash
+npx agent-server \
+  --port 3001 \
+  --mode interactive \
+  --repositoryPath /path/to/repo \
+  --taskId task_123 \
+  --runId run_456
 ```
-### Per-Task Configuration
+Required environment variables (validated by zod in `src/server/bin.ts`):
+- `JWT_PUBLIC_KEY` — RS256 public key for sandbox auth
+- `POSTHOG_API_URL` — PostHog API base URL
+- `POSTHOG_PERSONAL_API_KEY` — API key for PostHog requests
+- `POSTHOG_PROJECT_ID` — numeric project ID
-Override permissions for specific tasks (only applied during build phase):
+## Agent SDK
+The `Agent` class (`src/agent.ts`) is the entrypoint for local/programmatic usage. It handles LLM gateway configuration, log writer setup, and model filtering — then delegates to `createAcpConnection()`.
 ```typescript
-await agent.runTask(task, {
+import { Agent } from "@posthog/agent/agent"
+const agent = new Agent({
+  posthog: {
+    apiUrl: "https://app.posthog.com",
+    getApiKey: () => process.env.POSTHOG_PERSONAL_API_KEY!,
+    projectId: 12345,
+  },
+})
+// Run a task — returns an ACP connection with bidirectional streams
+const connection = await agent.run(taskId, runId, {
   repositoryPath: "/path/to/repo",
-  permissionMode: PermissionMode.DEFAULT,
-  canUseTool: async (toolName, input, { signal, suggestions }) => {
-    // Custom approval UI
-    const approved = await showApprovalDialog({
-      tool: toolName,
-      input: input,
-      suggestions: suggestions // Permission updates for "always allow"
-    });
-    if (approved.action === 'allow') {
-      return {
-        behavior: 'allow',
-        updatedInput: approved.modifiedInput || input,
-        updatedPermissions: approved.rememberChoice ? suggestions : undefined
-      };
-    }
-    return {
-      behavior: 'deny',
-      message: approved.reason || 'User denied permission',
-      interrupt: !approved.continueWithGuidance
-    };
-  }
-});
+  adapter: "claude", // or "codex"
+})
+// Attach a PR to the task run output
+await agent.attachPullRequestToTask(taskId, prUrl)
+// Cleanup: flush logs and release resources
+await agent.cleanup()
 ```
-### Direct Run Example
+Key difference from `AgentServer`: the SDK returns raw ACP streams for the caller to manage. There's no HTTP layer, no SSE broadcasting, and no auto-prompting. The caller is responsible for creating a `ClientSideConnection`, running the ACP handshake, and sending prompts. This is what Twig does when running agents locally.
-For one-off queries with custom permissions:
+For Codex adapters, `agent.run()` also fetches available models from the PostHog gateway and filters to OpenAI-compatible models, passing the allowed set to the ACP connection for model list filtering.
-```typescript
-const result = await agent.run("Fix the authentication bug", {
-  repositoryPath: "/path/to/repo",
-  permissionMode: PermissionMode.DEFAULT,
-  canUseTool: async (toolName, input, { signal }) => {
-    console.log(`Agent wants to use ${toolName}:`, input);
-    // Simple approval logic
-    if (toolName === 'Write' || toolName === 'Edit') {
-      const allowedFiles = ['src/', 'tests/'];
-      const filePath = input.file_path || input.path;
-      const isAllowed = allowedFiles.some(prefix => filePath?.startsWith(prefix));
-      if (!isAllowed) {
-        return {
-          behavior: 'deny',
-          message: `Can only modify files in: ${allowedFiles.join(', ')}`
-        };
-      }
-    }
-    return { behavior: 'allow', updatedInput: input };
-  }
-});
+## Log pipeline and session resume
+Logs serve two purposes: real-time observability and session resume. Every ACP message that flows through the tapped streams is persisted, creating a complete record of the conversation — user messages, agent responses, tool calls, tool results, tree snapshots, and metadata events. This record is the single source of truth for resuming a session from any point.
+### Writing logs
+`SessionLogWriter` (`src/session-log-writer.ts`) is a per-session multiplexer that buffers raw ndJson lines. On flush (auto-scheduled 500ms after writes, or explicit), it dispatches to whichever backend is configured:
+- **OTEL** (`src/otel-log-writer.ts`) — preferred path. Creates an OpenTelemetry `LoggerProvider` per session with resource attributes (`task_id`, `run_id`, `device_type`) set once and indexed via `resource_fingerprint`. Each ndJson line is emitted as an OTEL log record with an `event_type` attribute (the ACP method name) and exported via OTLP HTTP to PostHog's `/i/v1/agent-logs` endpoint. Batch flush interval defaults to 500ms.
+- **Legacy S3** — falls back to `PostHogAPIClient.appendTaskRunLog()`, which POSTs batched `StoredNotification` entries to the Django API. The API stores them as the task run's `log_url`.
+Both backends can be active simultaneously — OTEL for fast indexed queries, S3 for full log download.
+### Resuming from logs
+When a session needs to continue (e.g. cloud↔local handoff, or recovering from a crash), `resumeFromLog()` in `src/resume.ts` reconstructs the agent's state from the persisted log. This is implemented as a `ResumeSaga` (`src/sagas/resume-saga.ts`) with the following steps:
+```text
+1. fetch_task_run   → GET /api/.../runs/{runId}/ to find the log_url
+2. fetch_logs       → Download all StoredNotification entries
+3. find_snapshot    → Scan backwards for latest _posthog/tree_snapshot
+4. apply_snapshot   → Download archive from snapshot URL, restore working tree
+5. rebuild_conversation → Walk log entries to reconstruct conversation turns
+6. find_device      → Scan backwards for last device info (local vs cloud)
 ```
-### Available Tool Names
+The conversation rebuild (`rebuildConversation`) walks the log entries and reassembles turns from ACP `session/update` notifications:
+- `user_message` / `user_message_chunk` → start a new user turn
+- `agent_message_chunk` → accumulate into the current assistant turn (merging consecutive text blocks)
+- `tool_call` / `tool_call_update` → track tool calls with their inputs
+- `tool_result` → match results back to tool calls by `toolCallId`
+The result is a `ResumeState` containing the conversation history as `ConversationTurn[]`, the latest tree snapshot, and metadata. This feeds into the ACP `session/load` or `_posthog/session/resume` methods on the Claude adapter, which initializes a new Claude SDK query with the rebuilt context.
+Snapshot application can fail without aborting the resume — if the archive URL is missing or the download fails, the saga logs a warning and continues with just the conversation history. The `snapshotApplied` flag in the result tells the caller whether files were actually restored.
+## ACP extensions
+ACP defines standard methods like `session/prompt`, `session/update`, and `session/cancel`. PostHog extends the protocol with custom notifications in the `_posthog/` namespace (`src/acp-extensions.ts`). These serve three purposes:
+**Session lifecycle** — events that track the run from start to finish. Clients use these to update UI state (show progress, enable/disable controls, display completion). The Django API uses `task_complete` to mark the run as finished.
+- `_posthog/run_started` — `{ sessionId, runId, taskId? }` — session initialized and ready
+- `_posthog/task_complete` — `{ sessionId, taskId }` — agent finished (success or end-turn)
+- `_posthog/error` — `{ sessionId, message, error? }` — unrecoverable error
+- `_posthog/status` — `{ sessionId, status, message? }` — progress updates
+- `_posthog/sdk_session` — `{ taskRunId, sessionId, adapter }` — maps the ACP session to a task run and adapter type (emitted once per session, used by clients to know which adapter is active)
+**State synchronization** — events that keep the client's view of the agent's state in sync. These are essential for the cloud↔local handoff flow and for the client to render accurate UI.
+- `_posthog/branch_created` — `{ branch }` — agent created a git branch (client can update branch display)
+- `_posthog/tree_snapshot` — `{ treeHash, baseCommit, changes, ... }` — git working tree captured as a snapshot. Contains the tree hash, base commit, file change list, and optionally an archive URL. This is the key event for session resume — the resume saga scans backwards for the latest snapshot to restore files
+- `_posthog/mode_change` — `{ mode, previous_mode }` — permission mode changed (client updates mode selector)
+- `_posthog/compact_boundary` — `{ sessionId, timestamp }` — marks where context compaction occurred, so the client knows the conversation was summarized at this point
+- `_posthog/task_notification` — `{ sessionId, type, message?, data? }` — generic extensible notification for adapter-specific events
+**Client→agent commands** — notifications that flow from client to agent (via `POST /command` in cloud, or direct ACP in local). These are the "verbs" the client can send outside of `session/prompt`.
+- `_posthog/user_message` — `{ content }` — user typed a message (translated to `session/prompt`)
+- `_posthog/cancel` — cancel the current operation (translated to `session/cancel`)
+- `_posthog/close` — close the session and clean up
+- `_posthog/session/resume` — `{ sessionId, fromSnapshot? }` — request to resume a previous session (triggers the resume flow on the Claude adapter)
-The `canUseTool` callback receives one of these tool names:
-- **Read-only**: `Read`, `Glob`, `Grep`, `WebFetch`, `WebSearch`, `ListMcpResources`, `ReadMcpResource`
-- **Modifications**: `Write`, `Edit`, `NotebookEdit`
-- **Execution**: `Bash`, `BashOutput`, `KillBash`, `Task`
-- **Planning**: `ExitPlanMode`, `TodoWrite`
+**Debug** — operational visibility without polluting the ACP conversation.
-**Note**: Research and planning phases have fixed, read-only tool sets. The `canUseTool` hook only applies to the build phase and direct run calls.
+- `_posthog/console` — `{ sessionId, level, message }` — structured debug/info/warn/error log from the agent internals

package/dist/adapters/claude/conversion/tool-use-to-acp.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { PlanEntry, ToolCall, ToolCallUpdate } from '@agentclientprotocol/sdk';
+import { ToolUseBlock, ToolResultBlockParam, WebSearchToolResultBlockParam } from '@anthropic-ai/sdk/resources';
+import { BetaWebSearchToolResultBlockParam, BetaWebFetchToolResultBlockParam, BetaCodeExecutionToolResultBlockParam, BetaBashCodeExecutionToolResultBlockParam, BetaTextEditorCodeExecutionToolResultBlockParam, BetaRequestMCPToolResultBlockParam, BetaToolSearchToolResultBlockParam } from '@anthropic-ai/sdk/resources/beta.mjs';
+import { L as Logger } from '../../../logger-DDBiMOOD.js';
+import '../../../types.js';
+type ToolInfo = Pick<ToolCall, "title" | "kind" | "content" | "locations">;
+declare function toolInfoFromToolUse(toolUse: Pick<ToolUseBlock, "name" | "input">, cachedFileContent: {
+    [key: string]: string;
+}, logger?: Logger): ToolInfo;
+declare function toolUpdateFromToolResult(toolResult: ToolResultBlockParam | BetaWebSearchToolResultBlockParam | BetaWebFetchToolResultBlockParam | WebSearchToolResultBlockParam | BetaCodeExecutionToolResultBlockParam | BetaBashCodeExecutionToolResultBlockParam | BetaTextEditorCodeExecutionToolResultBlockParam | BetaRequestMCPToolResultBlockParam | BetaToolSearchToolResultBlockParam, toolUse: Pick<ToolUseBlock, "name" | "input"> | undefined): Pick<ToolCallUpdate, "title" | "content" | "locations">;
+type ClaudePlanEntry = {
+    content: string;
+    status: "pending" | "in_progress" | "completed";
+    activeForm: string;
+};
+declare function planEntries(input: {
+    todos: ClaudePlanEntry[];
+}): PlanEntry[];
+export { type ClaudePlanEntry, planEntries, toolInfoFromToolUse, toolUpdateFromToolResult };