@posthog/agent 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +221 -219
  3. package/dist/adapters/claude/conversion/tool-use-to-acp.d.ts +21 -0
  4. package/dist/adapters/claude/conversion/tool-use-to-acp.js +547 -0
  5. package/dist/adapters/claude/conversion/tool-use-to-acp.js.map +1 -0
  6. package/dist/adapters/claude/permissions/permission-options.d.ts +13 -0
  7. package/dist/adapters/claude/permissions/permission-options.js +117 -0
  8. package/dist/adapters/claude/permissions/permission-options.js.map +1 -0
  9. package/dist/adapters/claude/questions/utils.d.ts +132 -0
  10. package/dist/adapters/claude/questions/utils.js +63 -0
  11. package/dist/adapters/claude/questions/utils.js.map +1 -0
  12. package/dist/adapters/claude/tools.d.ts +18 -0
  13. package/dist/adapters/claude/tools.js +95 -0
  14. package/dist/adapters/claude/tools.js.map +1 -0
  15. package/dist/agent-DBQY1BfC.d.ts +123 -0
  16. package/dist/agent.d.ts +5 -0
  17. package/dist/agent.js +3656 -0
  18. package/dist/agent.js.map +1 -0
  19. package/dist/claude-cli/cli.js +3695 -2746
  20. package/dist/claude-cli/vendor/ripgrep/COPYING +3 -0
  21. package/dist/claude-cli/vendor/ripgrep/arm64-darwin/rg +0 -0
  22. package/dist/claude-cli/vendor/ripgrep/arm64-darwin/ripgrep.node +0 -0
  23. package/dist/claude-cli/vendor/ripgrep/arm64-linux/rg +0 -0
  24. package/dist/claude-cli/vendor/ripgrep/arm64-linux/ripgrep.node +0 -0
  25. package/dist/claude-cli/vendor/ripgrep/x64-darwin/rg +0 -0
  26. package/dist/claude-cli/vendor/ripgrep/x64-darwin/ripgrep.node +0 -0
  27. package/dist/claude-cli/vendor/ripgrep/x64-linux/rg +0 -0
  28. package/dist/claude-cli/vendor/ripgrep/x64-linux/ripgrep.node +0 -0
  29. package/dist/claude-cli/vendor/ripgrep/x64-win32/rg.exe +0 -0
  30. package/dist/claude-cli/vendor/ripgrep/x64-win32/ripgrep.node +0 -0
  31. package/dist/gateway-models.d.ts +24 -0
  32. package/dist/gateway-models.js +93 -0
  33. package/dist/gateway-models.js.map +1 -0
  34. package/dist/index.d.ts +170 -1157
  35. package/dist/index.js +9373 -5135
  36. package/dist/index.js.map +1 -1
  37. package/dist/logger-DDBiMOOD.d.ts +24 -0
  38. package/dist/posthog-api.d.ts +40 -0
  39. package/dist/posthog-api.js +175 -0
  40. package/dist/posthog-api.js.map +1 -0
  41. package/dist/server/agent-server.d.ts +41 -0
  42. package/dist/server/agent-server.js +10503 -0
  43. package/dist/server/agent-server.js.map +1 -0
  44. package/dist/server/bin.d.ts +1 -0
  45. package/dist/server/bin.js +10558 -0
  46. package/dist/server/bin.js.map +1 -0
  47. package/dist/types.d.ts +129 -0
  48. package/dist/types.js +1 -0
  49. package/dist/types.js.map +1 -0
  50. package/package.json +65 -13
  51. package/src/acp-extensions.ts +98 -16
  52. package/src/adapters/acp-connection.ts +494 -0
  53. package/src/adapters/base-acp-agent.ts +150 -0
  54. package/src/adapters/claude/claude-agent.ts +596 -0
  55. package/src/adapters/claude/conversion/acp-to-sdk.ts +102 -0
  56. package/src/adapters/claude/conversion/sdk-to-acp.ts +571 -0
  57. package/src/adapters/claude/conversion/tool-use-to-acp.ts +618 -0
  58. package/src/adapters/claude/hooks.ts +64 -0
  59. package/src/adapters/claude/mcp/tool-metadata.ts +102 -0
  60. package/src/adapters/claude/permissions/permission-handlers.ts +433 -0
  61. package/src/adapters/claude/permissions/permission-options.ts +103 -0
  62. package/src/adapters/claude/plan/utils.ts +56 -0
  63. package/src/adapters/claude/questions/utils.ts +92 -0
  64. package/src/adapters/claude/session/commands.ts +38 -0
  65. package/src/adapters/claude/session/mcp-config.ts +37 -0
  66. package/src/adapters/claude/session/models.ts +12 -0
  67. package/src/adapters/claude/session/options.ts +236 -0
  68. package/src/adapters/claude/tool-meta.ts +143 -0
  69. package/src/adapters/claude/tools.ts +53 -688
  70. package/src/adapters/claude/types.ts +61 -0
  71. package/src/adapters/codex/spawn.ts +130 -0
  72. package/src/agent.ts +96 -587
  73. package/src/execution-mode.ts +43 -0
  74. package/src/gateway-models.ts +135 -0
  75. package/src/index.ts +79 -0
  76. package/src/otel-log-writer.test.ts +105 -0
  77. package/src/otel-log-writer.ts +94 -0
  78. package/src/posthog-api.ts +75 -235
  79. package/src/resume.ts +115 -0
  80. package/src/sagas/apply-snapshot-saga.test.ts +690 -0
  81. package/src/sagas/apply-snapshot-saga.ts +88 -0
  82. package/src/sagas/capture-tree-saga.test.ts +892 -0
  83. package/src/sagas/capture-tree-saga.ts +141 -0
  84. package/src/sagas/resume-saga.test.ts +558 -0
  85. package/src/sagas/resume-saga.ts +332 -0
  86. package/src/sagas/test-fixtures.ts +250 -0
  87. package/src/server/agent-server.test.ts +220 -0
  88. package/src/server/agent-server.ts +748 -0
  89. package/src/server/bin.ts +88 -0
  90. package/src/server/jwt.ts +65 -0
  91. package/src/server/schemas.ts +47 -0
  92. package/src/server/types.ts +13 -0
  93. package/src/server/utils/retry.test.ts +122 -0
  94. package/src/server/utils/retry.ts +61 -0
  95. package/src/server/utils/sse-parser.test.ts +93 -0
  96. package/src/server/utils/sse-parser.ts +46 -0
  97. package/src/session-log-writer.test.ts +140 -0
  98. package/src/session-log-writer.ts +137 -0
  99. package/src/test/assertions.ts +114 -0
  100. package/src/test/controllers/sse-controller.ts +107 -0
  101. package/src/test/fixtures/api.ts +111 -0
  102. package/src/test/fixtures/config.ts +33 -0
  103. package/src/test/fixtures/notifications.ts +92 -0
  104. package/src/test/mocks/claude-sdk.ts +251 -0
  105. package/src/test/mocks/msw-handlers.ts +48 -0
  106. package/src/test/setup.ts +114 -0
  107. package/src/test/wait.ts +41 -0
  108. package/src/tree-tracker.ts +173 -0
  109. package/src/types.ts +54 -137
  110. package/src/utils/acp-content.ts +58 -0
  111. package/src/utils/async-mutex.test.ts +104 -0
  112. package/src/utils/async-mutex.ts +31 -0
  113. package/src/utils/common.ts +15 -0
  114. package/src/utils/gateway.ts +9 -6
  115. package/src/utils/logger.ts +0 -30
  116. package/src/utils/streams.ts +220 -0
  117. package/CLAUDE.md +0 -331
  118. package/src/adapters/claude/claude.ts +0 -1947
  119. package/src/adapters/claude/mcp-server.ts +0 -810
  120. package/src/adapters/claude/utils.ts +0 -267
  121. package/src/adapters/connection.ts +0 -95
  122. package/src/file-manager.ts +0 -273
  123. package/src/git-manager.ts +0 -577
  124. package/src/schemas.ts +0 -241
  125. package/src/session-store.ts +0 -259
  126. package/src/task-manager.ts +0 -163
  127. package/src/todo-manager.ts +0 -180
  128. package/src/tools/registry.ts +0 -134
  129. package/src/tools/types.ts +0 -133
  130. package/src/utils/tapped-stream.ts +0 -60
  131. package/src/worktree-manager.ts +0 -974
package/LICENSE CHANGED
@@ -3,7 +3,7 @@ Business Source License 1.1
3
3
  Parameters
4
4
 
5
5
  Licensor: PostHog Inc.
6
- Licensed Work: Array & Posthog Agent package
6
+ Licensed Work: Twig & Posthog Agent package
7
7
  The Licensed Work is © 2025 PostHog Inc.
8
8
 
9
9
  Change Date: None (the Licensed Work will remain under this License indefinitely unless Licensor specifies otherwise)
package/README.md CHANGED
@@ -1,265 +1,267 @@
1
- # PostHog Agent SDK
1
+ # @posthog/agent
2
+
3
+ The core runtime for PostHog cloud runs. Provides two things: an **Agent SDK** for running AI agents against PostHog tasks, and an **AgentServer** CLI that hosts the agent inside cloud sandboxes. Both are built on the [Agent Client Protocol (ACP)](https://github.com/anthropics/agent-client-protocol) for standardized agent ↔ client communication.
4
+
5
+ ## Architecture
6
+
7
+ ```text
8
+ ┌──────────────────────────────────────────────────────────────────┐
9
+ │ Client (Twig IDE or local CLI) │
10
+ │ connects via SSE/JSON-RPC (cloud) or in-process streams (local)│
11
+ └────────────────────┬─────────────────────────────────────────────┘
12
+
13
+ ┌──────────▼──────────┐
14
+ │ AgentServer │ (cloud only — Hono HTTP server)
15
+ │ GET /events (SSE) │
16
+ │ POST /command │
17
+ └──────────┬──────────┘
18
+
19
+ ┌──────────▼──────────┐
20
+ │ ACP Connection │ createAcpConnection()
21
+ │ (ndJson streams) │
22
+ │ │
23
+ │ ┌── tap ──┐ │ both directions intercepted for:
24
+ │ │ logging │ │ • SessionLogWriter (OTEL / S3)
25
+ │ │ SSE │ │ • SSE broadcast to clients
26
+ │ └─────────┘ │
27
+ └──────────┬──────────┘
28
+
29
+ ┌────────────┼────────────┐
30
+ ▼ ▼
31
+ ┌─────────────┐ ┌─────────────┐
32
+ │ Claude │ │ Codex │
33
+ │ Adapter │ │ Adapter │
34
+ │ │ │ │
35
+ │ ClaudeAcp- │ │ spawnCodex- │
36
+ │ Agent │ │ Process() │
37
+ │ (in-process)│ │ (subprocess)│
38
+ └──────┬──────┘ └──────┬──────┘
39
+ │ │
40
+ ▼ ▼
41
+ Claude Agent SDK codex-acp binary
42
+ query() stdin/stdout
43
+ ```
2
44
 
3
- TypeScript agent framework that wraps the Claude Agent SDK for PostHog's Array desktop app. Features a Git-based task execution system that stores task artifacts alongside your code.
45
+ ## Design decisions
4
46
 
5
- ## Quick Start
47
+ ### Why ACP?
6
48
 
7
- ```bash
8
- pnpm install
9
- pnpm run example
10
- ```
49
+ ACP is a standard protocol for agent ↔ client communication over ndJson streams. Using it gives us two things:
11
50
 
12
- ## Key Features
51
+ 1. **Any ACP-compatible client can connect** — the protocol is the contract, not our code.
52
+ 2. **Clean separation** — the agent adapter knows nothing about HTTP, and the server knows nothing about Claude/Codex. They communicate through typed streams.
13
53
 
14
- - **Git-Based Task Execution**: Plans and artifacts stored in `.posthog/` folders and committed to Git
15
- - **PostHog Integration**: Fetches existing tasks from PostHog API
16
- - **3-Phase Execution**: Research → Plan → Build with automatic progression
17
- - **Branch Management**: Automatic branch creation for planning and implementation
18
- - **Progress Tracking**: Execution status stored in PostHog `TaskRun` records for easy polling
54
+ ### Cloud vs local
19
55
 
20
- ## Usage
56
+ The same ACP agent runs in both contexts. The difference is how it's connected:
21
57
 
22
- ```typescript
23
- import { Agent, PermissionMode } from '@posthog/agent';
24
- import type { AgentEvent } from '@posthog/agent';
58
+ **Cloud (AgentServer):** The agent runs inside a sandbox. `AgentServer` is an HTTP server (Hono) that wraps the ACP connection. Clients connect via `GET /events` (SSE) and `POST /command` (JSON-RPC). Authentication uses JWT tokens (RS256) — the sandbox holds a public key, PostHog Django holds the private key. In background mode, the server auto-starts, prompts the agent with the task description, and signals completion via the PostHog API. In interactive mode, it stays open for conversation.
25
59
 
26
- const agent = new Agent({
27
- workingDirectory: "/path/to/repo",
28
- posthogApiUrl: "https://app.posthog.com",
29
- posthogApiKey: process.env.POSTHOG_API_KEY, // Used for both API and MCP
30
- posthogProjectId: 1,
31
- onEvent: (event) => {
32
- // Streamed updates for responsive UIs
33
- if (event.type !== 'token') {
34
- handleLiveEvent(event);
35
- }
36
- },
37
- });
38
-
39
- // Run a task
40
- const taskId = "task_abc123";
41
- const task = await agent.getPostHogClient()?.fetchTask(taskId);
42
-
43
- await agent.runTask(task, {
44
- repositoryPath: "/path/to/repo",
45
- permissionMode: PermissionMode.ACCEPT_EDITS,
46
- isCloudMode: false,
47
- createPR: true, // Optional: create PR after build. This setting has no effect if running in cloud mode.
48
- autoProgress: true,
49
- });
50
- ```
60
+ **Local (Twig desktop):** The agent runs in-process. Twig calls `createAcpConnection()` directly — no HTTP server, no JWT. The bidirectional ACP streams connect client ↔ agent within the same process.
51
61
 
52
- For local MCP development:
62
+ **TreeTracker** handles the bridge between these contexts: it captures the git working tree as snapshots (tree hash + file archive) so work can be transferred between cloud and local. This enables the "hand off" flow — start locally, continue in cloud, or vice versa. Tree snapshots use the Saga pattern (`src/sagas/`) for atomic operations with automatic rollback on failure.
53
63
 
54
- ```typescript
55
- const agent = new Agent({
56
- workingDirectory: "/path/to/repo",
57
- posthogMcpUrl: 'http://localhost:8787/mcp',
58
- });
59
- ```
64
+ ### Permission modes
60
65
 
61
- ## Task Execution
66
+ Four modes defined in `src/execution-mode.ts`:
62
67
 
63
- Each task execution creates Git branches and follows a 3-phase approach:
68
+ | Mode | ID | Behavior |
69
+ | ------------------- | ------------------- | --------------------------------------------------------------- |
70
+ | Always ask | `default` | Prompts for permission on first use of each tool |
71
+ | Accept edits | `acceptEdits` | Auto-approves file write tools for the session |
72
+ | Plan mode | `plan` | Read-only — the agent can analyze but not modify files |
73
+ | Bypass permissions | `bypassPermissions` | Auto-approves everything (hidden when running as root) |
64
74
 
65
- 1. **Research Phase**: Analyzes the codebase and may generate clarifying questions
66
- 2. **Planning Phase**: Creates an implementation plan in `.posthog/{id}/plan.md` on branch `posthog/task-{id}-planning`
67
- 3. **Build Phase**: Implements code changes on branch `posthog/task-{id}-implementation`
75
+ In cloud background mode, permissions are always auto-approved. In interactive mode, the permission system is active and configurable per session. Tool categorization lives in `src/adapters/claude/tools.ts` — each tool belongs to a group (read, write, bash, search, web, agent) and modes whitelist groups.
68
76
 
69
- ## File System
77
+ ## ACP connection layer
70
78
 
71
- ```
72
- your-repo/
73
- ├── .posthog/
74
- │ ├── README.md
75
- │ ├── .gitignore
76
- │ └── {task-id}/
77
- │ ├── plan.md
78
- │ ├── questions.json (if research phase generated questions)
79
- │ └── context.md (optional)
80
- └── (your code)
81
- ```
79
+ `createAcpConnection()` in `src/adapters/acp-connection.ts` is the heart of the package. It's a factory that returns a `{ clientStreams, cleanup }` object — a pair of ndJson `ReadableStream`/`WritableStream` that the caller uses to speak ACP.
82
80
 
83
- ## Progress Updates
81
+ Internally it does three things:
84
82
 
85
- Progress for each task execution is persisted to PostHog's `TaskRun` model, so UIs can poll for updates without relying on streaming hooks:
83
+ 1. **Creates bidirectional streams** two pairs of `(readable, writable)` using `createBidirectionalStreams()`. One pair for the agent side, one for the client side, cross-wired so writes on one appear as reads on the other.
86
84
 
87
- ```typescript
88
- const agent = new Agent({
89
- workingDirectory: repoPath,
90
- posthogApiUrl: "https://app.posthog.com",
91
- posthogApiKey: process.env.POSTHOG_KEY,
92
- posthogProjectId: 1,
93
- });
94
-
95
- const poller = setInterval(async () => {
96
- const client = agent.getPostHogClient();
97
- const runs = await client?.listTaskRuns(taskId);
98
- const latestRun = runs?.sort((a, b) =>
99
- new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
100
- )[0];
101
-
102
- if (latestRun) {
103
- // Fetch logs from S3 using presigned URL
104
- const logs = await client?.fetchTaskRunLogs(latestRun);
105
- renderProgress(latestRun.status, logs || []);
106
- }
107
- }, 3000);
108
-
109
- try {
110
- await agent.runTask(task, { repositoryPath: repoPath });
111
- } finally {
112
- clearInterval(poller);
113
- }
114
-
115
- // Live stream still available through the onEvent hook
116
- function handleLiveEvent(event: AgentEvent) {
117
- switch (event.type) {
118
- case 'status':
119
- // optimistic UI update
120
- break;
121
- case 'error':
122
- notifyError(event.message);
123
- break;
124
- default:
125
- break;
126
- }
127
- }
128
- ```
85
+ 2. **Taps both directions for logging** — if a `logWriter` and `taskRunId` are provided, both the agent→client and client→agent writables are wrapped with `createTappedWritableStream`. Every ndJson line that flows through is appended to the `SessionLogWriter` buffer. This is transparent to both ends.
129
86
 
130
- > Prefer streaming updates? Pass an `onEvent` handler when constructing the agent to keep receiving real-time events while progress is also written to PostHog.
87
+ 3. **Connects the adapter** for Claude, it instantiates `ClaudeAcpAgent` and wires it to the agent-side streams via `AgentSideConnection`. For Codex, it spawns a subprocess and pipes the client-side streams to the process's stdin/stdout.
131
88
 
132
- ## Requirements
89
+ The Claude and Codex paths differ significantly:
133
90
 
134
- - PNPM
135
- - Git repository
136
- - PostHog API access
137
- - Claude API access via `@anthropic-ai/claude-agent-sdk`
91
+ **Claude (in-process):** The `AgentSideConnection` calls methods on `ClaudeAcpAgent` directly. The agent implements the full ACP `Agent` interface: `initialize`, `newSession`, `prompt`, `cancel`, etc. Under the hood, `prompt()` creates a Claude Agent SDK `Query` and processes messages in a loop, converting between ACP and SDK formats using the `src/adapters/claude/conversion/` module.
138
92
 
139
- ## Configuration Options
93
+ **Codex (subprocess):** There's no `AgentSideConnection` — the `codex-acp` binary speaks ACP natively on stdin/stdout. The connection layer adds `TransformStream` filters on both directions to: suppress noisy `session/update` messages during session loading, inject `_posthog/sdk_session` notifications, filter model lists to allowed IDs, and sync reasoning effort config before prompts.
140
94
 
141
- You can customize behavior using `TaskExecutionOptions`:
95
+ ## AgentServer
142
96
 
143
- ```ts
144
- await agent.runTask(task, {
145
- repositoryPath: "/path/to/repo",
146
- permissionMode: PermissionMode.ACCEPT_EDITS, // or PLAN, DEFAULT, BYPASS
147
- isCloudMode: false, // local execution with pauses between phases
148
- autoProgress: true, // automatically progress through phases
149
- queryOverrides: {
150
- model: 'claude-sonnet-4-5-20250929',
151
- temperature: 0.7
152
- }
153
- });
97
+ `AgentServer` (`src/server/agent-server.ts`) wraps an ACP connection in an HTTP server for cloud sandbox execution. It manages a single `ActiveSession` at a time.
98
+
99
+ ### Session initialization flow
100
+
101
+ ```text
102
+ start()
103
+
104
+ ├─ Hono HTTP server starts on configured port
105
+
106
+ └─ autoInitializeSession()
107
+
108
+ ├─ Creates synthetic JwtPayload from CLI config
109
+ ├─ configureEnvironment() — sets ANTHROPIC_BASE_URL, OPENAI_BASE_URL, etc.
110
+ │ pointing at the PostHog LLM gateway
111
+ ├─ Creates TreeTracker, SessionLogWriter, PostHogAPIClient
112
+ ├─ createAcpConnection() — sets up ACP streams with log tapping
113
+
114
+ ├─ Wraps client streams with a SECOND tap layer (NdJsonTap)
115
+ │ that broadcasts every ACP message to SSE clients
116
+
117
+ ├─ ClientSideConnection.initialize() — ACP handshake
118
+ ├─ ClientSideConnection.newSession() — starts agent session
119
+
120
+ └─ sendInitialTaskMessage()
121
+ ├─ Fetches task from PostHog API
122
+ ├─ Sends task.description as first prompt
123
+ └─ Background mode: signals completion/failure via API
124
+ Interactive mode: stays open
154
125
  ```
155
126
 
156
- ## Fine-Grained Permissions
127
+ The two tapping layers are distinct. The inner tap (from `createAcpConnection`) persists to logs. The outer tap (in `AgentServer`) broadcasts to SSE. This means log persistence works for both cloud and local, while SSE broadcast is cloud-only.
157
128
 
158
- For advanced control over agent actions, you can provide a `canUseTool` callback that intercepts every tool use during the **build phase** (for task execution) or **direct run calls**. This allows you to implement custom approval flows, logging, or restrictions.
129
+ ### HTTP endpoints
159
130
 
160
- See the [Claude Agent SDK Permissions docs](https://docs.claude.com/en/api/agent-sdk/permissions) for more details.
131
+ | Method | Path | Auth | Description |
132
+ | ------ | ---------- | ---- | -------------------------------------------------------- |
133
+ | `GET` | `/health` | None | Returns `{ status: "ok", hasSession }` |
134
+ | `GET` | `/events` | JWT | SSE stream — all ACP notifications broadcast in real time |
135
+ | `POST` | `/command` | JWT | JSON-RPC commands: `user_message`, `cancel`, `close` |
161
136
 
162
- ### Per-Agent Configuration
137
+ JWT validation (`src/server/jwt.ts`) uses RS256 with a configurable public key. The JWT payload carries `task_id`, `run_id`, `team_id`, `user_id`, `distinct_id`, and `mode`. The audience must be `posthog:sandbox_connection`.
163
138
 
164
- Apply the same permission hook to all task executions and direct runs:
139
+ ### Commands flow through ACP
165
140
 
166
- ```typescript
167
- import { Agent } from '@posthog/agent';
168
- import type { PermissionResult } from '@posthog/agent';
141
+ When `POST /command` receives a `user_message`, it doesn't handle it directly — it calls `clientConnection.prompt()` on the ACP `ClientSideConnection`, which sends a `session/prompt` message through the ACP streams to the agent. Similarly, `cancel` sends `session/cancel`. This means all commands follow the same path as in-process calls from Twig, with the HTTP layer just being a thin translation.
169
142
 
170
- const agent = new Agent({
171
- workingDirectory: "/path/to/repo",
172
- posthogApiUrl: "https://app.posthog.com",
173
- posthogApiKey: process.env.POSTHOG_API_KEY,
174
- posthogProjectId: 1,
175
- canUseTool: async (toolName, input, { signal, suggestions }) => {
176
- // Block destructive commands
177
- if (toolName === 'Bash' && input.command?.includes('rm -rf')) {
178
- return {
179
- behavior: 'deny',
180
- message: 'Destructive rm -rf commands are not allowed',
181
- interrupt: true
182
- };
183
- }
184
-
185
- // Allow everything else
186
- return {
187
- behavior: 'allow',
188
- updatedInput: input
189
- };
190
- }
191
- });
143
+ ### Auto-approval in cloud mode
144
+
145
+ The `AgentServer` provides a `requestPermission` callback to the `ClientSideConnection` that always selects the "allow" option. In background mode this is necessary (no human to ask). In interactive mode it currently does the same, with a TODO for future per-tool approval via SSE round-trips.
146
+
147
+ ### Tree state capture
148
+
149
+ After every `Write` or `Edit` tool call, the server captures a git tree snapshot via `TreeTracker` and broadcasts it as a `_posthog/tree_snapshot` SSE event. A final snapshot is captured during session cleanup. This is how the client knows what files changed and can restore state for cloud↔local handoff.
150
+
151
+ ### CLI
152
+
153
+ ```bash
154
+ npx agent-server \
155
+ --port 3001 \
156
+ --mode interactive \
157
+ --repositoryPath /path/to/repo \
158
+ --taskId task_123 \
159
+ --runId run_456
192
160
  ```
193
161
 
194
- ### Per-Task Configuration
162
+ Required environment variables (validated by zod in `src/server/bin.ts`):
163
+
164
+ - `JWT_PUBLIC_KEY` — RS256 public key for sandbox auth
165
+ - `POSTHOG_API_URL` — PostHog API base URL
166
+ - `POSTHOG_PERSONAL_API_KEY` — API key for PostHog requests
167
+ - `POSTHOG_PROJECT_ID` — numeric project ID
195
168
 
196
- Override permissions for specific tasks (only applied during build phase):
169
+ ## Agent SDK
170
+
171
+ The `Agent` class (`src/agent.ts`) is the entrypoint for local/programmatic usage. It handles LLM gateway configuration, log writer setup, and model filtering — then delegates to `createAcpConnection()`.
197
172
 
198
173
  ```typescript
199
- await agent.runTask(task, {
174
+ import { Agent } from "@posthog/agent/agent"
175
+
176
+ const agent = new Agent({
177
+ posthog: {
178
+ apiUrl: "https://app.posthog.com",
179
+ getApiKey: () => process.env.POSTHOG_PERSONAL_API_KEY!,
180
+ projectId: 12345,
181
+ },
182
+ })
183
+
184
+ // Run a task — returns an ACP connection with bidirectional streams
185
+ const connection = await agent.run(taskId, runId, {
200
186
  repositoryPath: "/path/to/repo",
201
- permissionMode: PermissionMode.DEFAULT,
202
- canUseTool: async (toolName, input, { signal, suggestions }) => {
203
- // Custom approval UI
204
- const approved = await showApprovalDialog({
205
- tool: toolName,
206
- input: input,
207
- suggestions: suggestions // Permission updates for "always allow"
208
- });
209
-
210
- if (approved.action === 'allow') {
211
- return {
212
- behavior: 'allow',
213
- updatedInput: approved.modifiedInput || input,
214
- updatedPermissions: approved.rememberChoice ? suggestions : undefined
215
- };
216
- }
217
-
218
- return {
219
- behavior: 'deny',
220
- message: approved.reason || 'User denied permission',
221
- interrupt: !approved.continueWithGuidance
222
- };
223
- }
224
- });
187
+ adapter: "claude", // or "codex"
188
+ })
189
+
190
+ // Attach a PR to the task run output
191
+ await agent.attachPullRequestToTask(taskId, prUrl)
192
+
193
+ // Cleanup: flush logs and release resources
194
+ await agent.cleanup()
225
195
  ```
226
196
 
227
- ### Direct Run Example
197
+ Key difference from `AgentServer`: the SDK returns raw ACP streams for the caller to manage. There's no HTTP layer, no SSE broadcasting, and no auto-prompting. The caller is responsible for creating a `ClientSideConnection`, running the ACP handshake, and sending prompts. This is what Twig does when running agents locally.
228
198
 
229
- For one-off queries with custom permissions:
199
+ For Codex adapters, `agent.run()` also fetches available models from the PostHog gateway and filters to OpenAI-compatible models, passing the allowed set to the ACP connection for model list filtering.
230
200
 
231
- ```typescript
232
- const result = await agent.run("Fix the authentication bug", {
233
- repositoryPath: "/path/to/repo",
234
- permissionMode: PermissionMode.DEFAULT,
235
- canUseTool: async (toolName, input, { signal }) => {
236
- console.log(`Agent wants to use ${toolName}:`, input);
237
-
238
- // Simple approval logic
239
- if (toolName === 'Write' || toolName === 'Edit') {
240
- const allowedFiles = ['src/', 'tests/'];
241
- const filePath = input.file_path || input.path;
242
- const isAllowed = allowedFiles.some(prefix => filePath?.startsWith(prefix));
243
-
244
- if (!isAllowed) {
245
- return {
246
- behavior: 'deny',
247
- message: `Can only modify files in: ${allowedFiles.join(', ')}`
248
- };
249
- }
250
- }
251
-
252
- return { behavior: 'allow', updatedInput: input };
253
- }
254
- });
201
+ ## Log pipeline and session resume
202
+
203
+ Logs serve two purposes: real-time observability and session resume. Every ACP message that flows through the tapped streams is persisted, creating a complete record of the conversation — user messages, agent responses, tool calls, tool results, tree snapshots, and metadata events. This record is the single source of truth for resuming a session from any point.
204
+
205
+ ### Writing logs
206
+
207
+ `SessionLogWriter` (`src/session-log-writer.ts`) is a per-session multiplexer that buffers raw ndJson lines. On flush (auto-scheduled 500ms after writes, or explicit), it dispatches to whichever backend is configured:
208
+
209
+ - **OTEL** (`src/otel-log-writer.ts`) preferred path. Creates an OpenTelemetry `LoggerProvider` per session with resource attributes (`task_id`, `run_id`, `device_type`) set once and indexed via `resource_fingerprint`. Each ndJson line is emitted as an OTEL log record with an `event_type` attribute (the ACP method name) and exported via OTLP HTTP to PostHog's `/i/v1/agent-logs` endpoint. Batch flush interval defaults to 500ms.
210
+ - **Legacy S3** — falls back to `PostHogAPIClient.appendTaskRunLog()`, which POSTs batched `StoredNotification` entries to the Django API. The API stores them as the task run's `log_url`.
211
+
212
+ Both backends can be active simultaneously — OTEL for fast indexed queries, S3 for full log download.
213
+
214
+ ### Resuming from logs
215
+
216
+ When a session needs to continue (e.g. cloud↔local handoff, or recovering from a crash), `resumeFromLog()` in `src/resume.ts` reconstructs the agent's state from the persisted log. This is implemented as a `ResumeSaga` (`src/sagas/resume-saga.ts`) with the following steps:
217
+
218
+ ```text
219
+ 1. fetch_task_run → GET /api/.../runs/{runId}/ to find the log_url
220
+ 2. fetch_logs → Download all StoredNotification entries
221
+ 3. find_snapshot → Scan backwards for latest _posthog/tree_snapshot
222
+ 4. apply_snapshot → Download archive from snapshot URL, restore working tree
223
+ 5. rebuild_conversation → Walk log entries to reconstruct conversation turns
224
+ 6. find_device → Scan backwards for last device info (local vs cloud)
255
225
  ```
256
226
 
257
- ### Available Tool Names
227
+ The conversation rebuild (`rebuildConversation`) walks the log entries and reassembles turns from ACP `session/update` notifications:
228
+
229
+ - `user_message` / `user_message_chunk` → start a new user turn
230
+ - `agent_message_chunk` → accumulate into the current assistant turn (merging consecutive text blocks)
231
+ - `tool_call` / `tool_call_update` → track tool calls with their inputs
232
+ - `tool_result` → match results back to tool calls by `toolCallId`
233
+
234
+ The result is a `ResumeState` containing the conversation history as `ConversationTurn[]`, the latest tree snapshot, and metadata. This feeds into the ACP `session/load` or `_posthog/session/resume` methods on the Claude adapter, which initializes a new Claude SDK query with the rebuilt context.
235
+
236
+ Snapshot application can fail without aborting the resume — if the archive URL is missing or the download fails, the saga logs a warning and continues with just the conversation history. The `snapshotApplied` flag in the result tells the caller whether files were actually restored.
237
+
238
+ ## ACP extensions
239
+
240
+ ACP defines standard methods like `session/prompt`, `session/update`, and `session/cancel`. PostHog extends the protocol with custom notifications in the `_posthog/` namespace (`src/acp-extensions.ts`). These serve three purposes:
241
+
242
+ **Session lifecycle** — events that track the run from start to finish. Clients use these to update UI state (show progress, enable/disable controls, display completion). The Django API uses `task_complete` to mark the run as finished.
243
+
244
+ - `_posthog/run_started` — `{ sessionId, runId, taskId? }` — session initialized and ready
245
+ - `_posthog/task_complete` — `{ sessionId, taskId }` — agent finished (success or end-turn)
246
+ - `_posthog/error` — `{ sessionId, message, error? }` — unrecoverable error
247
+ - `_posthog/status` — `{ sessionId, status, message? }` — progress updates
248
+ - `_posthog/sdk_session` — `{ taskRunId, sessionId, adapter }` — maps the ACP session to a task run and adapter type (emitted once per session, used by clients to know which adapter is active)
249
+
250
+ **State synchronization** — events that keep the client's view of the agent's state in sync. These are essential for the cloud↔local handoff flow and for the client to render accurate UI.
251
+
252
+ - `_posthog/branch_created` — `{ branch }` — agent created a git branch (client can update branch display)
253
+ - `_posthog/tree_snapshot` — `{ treeHash, baseCommit, changes, ... }` — git working tree captured as a snapshot. Contains the tree hash, base commit, file change list, and optionally an archive URL. This is the key event for session resume — the resume saga scans backwards for the latest snapshot to restore files
254
+ - `_posthog/mode_change` — `{ mode, previous_mode }` — permission mode changed (client updates mode selector)
255
+ - `_posthog/compact_boundary` — `{ sessionId, timestamp }` — marks where context compaction occurred, so the client knows the conversation was summarized at this point
256
+ - `_posthog/task_notification` — `{ sessionId, type, message?, data? }` — generic extensible notification for adapter-specific events
257
+
258
+ **Client→agent commands** — notifications that flow from client to agent (via `POST /command` in cloud, or direct ACP in local). These are the "verbs" the client can send outside of `session/prompt`.
259
+
260
+ - `_posthog/user_message` — `{ content }` — user typed a message (translated to `session/prompt`)
261
+ - `_posthog/cancel` — cancel the current operation (translated to `session/cancel`)
262
+ - `_posthog/close` — close the session and clean up
263
+ - `_posthog/session/resume` — `{ sessionId, fromSnapshot? }` — request to resume a previous session (triggers the resume flow on the Claude adapter)
258
264
 
259
- The `canUseTool` callback receives one of these tool names:
260
- - **Read-only**: `Read`, `Glob`, `Grep`, `WebFetch`, `WebSearch`, `ListMcpResources`, `ReadMcpResource`
261
- - **Modifications**: `Write`, `Edit`, `NotebookEdit`
262
- - **Execution**: `Bash`, `BashOutput`, `KillBash`, `Task`
263
- - **Planning**: `ExitPlanMode`, `TodoWrite`
265
+ **Debug** operational visibility without polluting the ACP conversation.
264
266
 
265
- **Note**: Research and planning phases have fixed, read-only tool sets. The `canUseTool` hook only applies to the build phase and direct run calls.
267
+ - `_posthog/console` `{ sessionId, level, message }` structured debug/info/warn/error log from the agent internals
@@ -0,0 +1,21 @@
1
+ import { PlanEntry, ToolCall, ToolCallUpdate } from '@agentclientprotocol/sdk';
2
+ import { ToolUseBlock, ToolResultBlockParam, WebSearchToolResultBlockParam } from '@anthropic-ai/sdk/resources';
3
+ import { BetaWebSearchToolResultBlockParam, BetaWebFetchToolResultBlockParam, BetaCodeExecutionToolResultBlockParam, BetaBashCodeExecutionToolResultBlockParam, BetaTextEditorCodeExecutionToolResultBlockParam, BetaRequestMCPToolResultBlockParam, BetaToolSearchToolResultBlockParam } from '@anthropic-ai/sdk/resources/beta.mjs';
4
+ import { L as Logger } from '../../../logger-DDBiMOOD.js';
5
+ import '../../../types.js';
6
+
7
+ type ToolInfo = Pick<ToolCall, "title" | "kind" | "content" | "locations">;
8
+ declare function toolInfoFromToolUse(toolUse: Pick<ToolUseBlock, "name" | "input">, cachedFileContent: {
9
+ [key: string]: string;
10
+ }, logger?: Logger): ToolInfo;
11
+ declare function toolUpdateFromToolResult(toolResult: ToolResultBlockParam | BetaWebSearchToolResultBlockParam | BetaWebFetchToolResultBlockParam | WebSearchToolResultBlockParam | BetaCodeExecutionToolResultBlockParam | BetaBashCodeExecutionToolResultBlockParam | BetaTextEditorCodeExecutionToolResultBlockParam | BetaRequestMCPToolResultBlockParam | BetaToolSearchToolResultBlockParam, toolUse: Pick<ToolUseBlock, "name" | "input"> | undefined): Pick<ToolCallUpdate, "title" | "content" | "locations">;
12
+ type ClaudePlanEntry = {
13
+ content: string;
14
+ status: "pending" | "in_progress" | "completed";
15
+ activeForm: string;
16
+ };
17
+ declare function planEntries(input: {
18
+ todos: ClaudePlanEntry[];
19
+ }): PlanEntry[];
20
+
21
+ export { type ClaudePlanEntry, planEntries, toolInfoFromToolUse, toolUpdateFromToolResult };