@posthog/agent 2.1.115 → 2.1.120

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ import { type SetupServerApi, setupServer } from "msw/node";
2
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
3
+ import { createTestRepo, type TestRepo } from "../test/fixtures/api.js";
4
+ import { createPostHogHandlers } from "../test/mocks/msw-handlers.js";
5
+ import { AgentServer } from "./agent-server.js";
6
+
7
+ /* eslint-disable @typescript-eslint/no-explicit-any */
8
+
9
+ const TEST_PAYLOAD = {
10
+ run_id: "test-run-id",
11
+ task_id: "test-task-id",
12
+ team_id: 1,
13
+ user_id: 1,
14
+ distinct_id: "test-distinct-id",
15
+ mode: "interactive" as const,
16
+ };
17
+
18
+ const QUESTION_META = {
19
+ twigToolKind: "question",
20
+ questions: [
21
+ {
22
+ question: "Which license should I use?",
23
+ options: [
24
+ { label: "MIT", description: "Permissive license" },
25
+ { label: "Apache 2.0", description: "Patent grant included" },
26
+ { label: "GPL v3", description: "Copyleft license" },
27
+ ],
28
+ },
29
+ ],
30
+ };
31
+
32
+ describe("Question relay", () => {
33
+ let repo: TestRepo;
34
+ let server: any;
35
+ let mswServer: SetupServerApi;
36
+ const port = 3098;
37
+
38
+ beforeEach(async () => {
39
+ repo = await createTestRepo("question-relay");
40
+ mswServer = setupServer(
41
+ ...createPostHogHandlers({ baseUrl: "http://localhost:8000" }),
42
+ );
43
+ mswServer.listen({ onUnhandledRequest: "bypass" });
44
+
45
+ server = new AgentServer({
46
+ port,
47
+ jwtPublicKey: "unused-in-unit-tests",
48
+ repositoryPath: repo.path,
49
+ apiUrl: "http://localhost:8000",
50
+ apiKey: "test-api-key",
51
+ projectId: 1,
52
+ mode: "interactive",
53
+ taskId: "test-task-id",
54
+ runId: "test-run-id",
55
+ });
56
+ });
57
+
58
+ afterEach(async () => {
59
+ mswServer.close();
60
+ await repo.cleanup();
61
+ });
62
+
63
+ describe("isQuestionMeta", () => {
64
+ it.each([
65
+ ["null", null],
66
+ ["undefined", undefined],
67
+ ["number", 42],
68
+ ["string", "not a question"],
69
+ ["object without question field", { options: [] }],
70
+ ["object with non-string question", { question: 123 }],
71
+ ["object with non-array options", { question: "Q?", options: "bad" }],
72
+ [
73
+ "object with invalid option items",
74
+ { question: "Q?", options: [{ notLabel: "x" }] },
75
+ ],
76
+ ])("rejects %s", (_label, value) => {
77
+ expect(server.isQuestionMeta(value)).toBe(false);
78
+ });
79
+
80
+ it.each([
81
+ [
82
+ "question with options",
83
+ {
84
+ question: "Pick one",
85
+ options: [{ label: "A", description: "desc" }, { label: "B" }],
86
+ },
87
+ ],
88
+ ["question without options", { question: "What do you think?" }],
89
+ ["question with empty options", { question: "Confirm?", options: [] }],
90
+ ])("accepts %s", (_label, value) => {
91
+ expect(server.isQuestionMeta(value)).toBe(true);
92
+ });
93
+ });
94
+
95
+ describe("getFirstQuestionMeta", () => {
96
+ it.each([
97
+ ["null meta", null],
98
+ ["undefined meta", undefined],
99
+ ["meta without questions", { other: "field" }],
100
+ ["meta with empty questions array", { questions: [] }],
101
+ ["meta with non-array questions", { questions: "not-array" }],
102
+ ])("returns null for %s", (_label, meta) => {
103
+ expect(server.getFirstQuestionMeta(meta)).toBeNull();
104
+ });
105
+
106
+ it("returns first question from valid meta", () => {
107
+ const result = server.getFirstQuestionMeta(QUESTION_META);
108
+ expect(result).toEqual(QUESTION_META.questions[0]);
109
+ });
110
+ });
111
+
112
+ describe("relaySlackQuestion", () => {
113
+ it("relays formatted question with options via posthogAPI", () => {
114
+ const relaySpy = vi
115
+ .spyOn(server.posthogAPI, "relayMessage")
116
+ .mockResolvedValue(undefined);
117
+
118
+ server.relaySlackQuestion(TEST_PAYLOAD, QUESTION_META);
119
+
120
+ expect(relaySpy).toHaveBeenCalledOnce();
121
+ const [taskId, runId, message] = relaySpy.mock.calls[0];
122
+ expect(taskId).toBe("test-task-id");
123
+ expect(runId).toBe("test-run-id");
124
+ expect(message).toContain("*Which license should I use?*");
125
+ expect(message).toContain("1. *MIT*");
126
+ expect(message).toContain("Permissive license");
127
+ expect(message).toContain("2. *Apache 2.0*");
128
+ expect(message).toContain("3. *GPL v3*");
129
+ expect(message).toContain("Reply in this thread");
130
+ });
131
+
132
+ it("sets questionRelayedToSlack flag", () => {
133
+ vi.spyOn(server.posthogAPI, "relayMessage").mockResolvedValue(undefined);
134
+
135
+ server.relaySlackQuestion(TEST_PAYLOAD, QUESTION_META);
136
+ expect(server.questionRelayedToSlack).toBe(true);
137
+ });
138
+
139
+ it("does not relay when meta has no valid question", () => {
140
+ const relaySpy = vi
141
+ .spyOn(server.posthogAPI, "relayMessage")
142
+ .mockResolvedValue(undefined);
143
+
144
+ server.relaySlackQuestion(TEST_PAYLOAD, { twigToolKind: "question" });
145
+ expect(server.questionRelayedToSlack).toBe(false);
146
+ expect(relaySpy).not.toHaveBeenCalled();
147
+ });
148
+ });
149
+
150
+ describe("createCloudClient requestPermission", () => {
151
+ const ALLOW_OPTIONS = [
152
+ { kind: "allow_once", optionId: "allow", name: "Allow" },
153
+ ];
154
+
155
+ describe("with TWIG_INTERACTION_ORIGIN=slack", () => {
156
+ beforeEach(() => {
157
+ process.env.TWIG_INTERACTION_ORIGIN = "slack";
158
+ });
159
+
160
+ afterEach(() => {
161
+ delete process.env.TWIG_INTERACTION_ORIGIN;
162
+ });
163
+
164
+ it("returns cancelled with relay message for question tool", async () => {
165
+ vi.spyOn(server.posthogAPI, "relayMessage").mockResolvedValue(
166
+ undefined,
167
+ );
168
+ const client = server.createCloudClient(TEST_PAYLOAD);
169
+
170
+ const result = await client.requestPermission({
171
+ options: ALLOW_OPTIONS,
172
+ toolCall: { _meta: QUESTION_META },
173
+ });
174
+
175
+ expect(result.outcome.outcome).toBe("cancelled");
176
+ expect(result._meta?.message).toContain("relayed to the Slack thread");
177
+ expect(result._meta?.message).toContain("Do NOT re-ask the question");
178
+ });
179
+
180
+ it("auto-approves non-question tools", async () => {
181
+ const client = server.createCloudClient(TEST_PAYLOAD);
182
+
183
+ const result = await client.requestPermission({
184
+ options: ALLOW_OPTIONS,
185
+ toolCall: { _meta: { twigToolKind: "bash" } },
186
+ });
187
+
188
+ expect(result.outcome.outcome).toBe("selected");
189
+ });
190
+
191
+ it("auto-approves tools without meta", async () => {
192
+ const client = server.createCloudClient(TEST_PAYLOAD);
193
+
194
+ const result = await client.requestPermission({
195
+ options: ALLOW_OPTIONS,
196
+ toolCall: { _meta: null },
197
+ });
198
+
199
+ expect(result.outcome.outcome).toBe("selected");
200
+ });
201
+ });
202
+
203
+ describe("without TWIG_INTERACTION_ORIGIN", () => {
204
+ beforeEach(() => {
205
+ delete process.env.TWIG_INTERACTION_ORIGIN;
206
+ });
207
+
208
+ it("auto-approves question tools (no Slack relay)", async () => {
209
+ const client = server.createCloudClient(TEST_PAYLOAD);
210
+
211
+ const result = await client.requestPermission({
212
+ options: ALLOW_OPTIONS,
213
+ toolCall: { _meta: QUESTION_META },
214
+ });
215
+
216
+ expect(result.outcome.outcome).toBe("selected");
217
+ });
218
+ });
219
+ });
220
+
221
+ describe("relayAgentResponse duplicate suppression", () => {
222
+ it("skips relay when questionRelayedToSlack is set", async () => {
223
+ const relaySpy = vi
224
+ .spyOn(server.posthogAPI, "relayMessage")
225
+ .mockResolvedValue(undefined);
226
+
227
+ server.session = {
228
+ payload: TEST_PAYLOAD,
229
+ logWriter: {
230
+ flush: vi.fn().mockResolvedValue(undefined),
231
+ getLastAgentMessage: vi.fn().mockReturnValue("agent response"),
232
+ isRegistered: vi.fn().mockReturnValue(true),
233
+ },
234
+ };
235
+
236
+ server.questionRelayedToSlack = true;
237
+ await server.relayAgentResponse(TEST_PAYLOAD);
238
+
239
+ expect(server.questionRelayedToSlack).toBe(false);
240
+ expect(relaySpy).not.toHaveBeenCalled();
241
+ });
242
+
243
+ it("relays normally when questionRelayedToSlack is not set", async () => {
244
+ const relaySpy = vi
245
+ .spyOn(server.posthogAPI, "relayMessage")
246
+ .mockResolvedValue(undefined);
247
+
248
+ server.session = {
249
+ payload: TEST_PAYLOAD,
250
+ logWriter: {
251
+ flush: vi.fn().mockResolvedValue(undefined),
252
+ getLastAgentMessage: vi.fn().mockReturnValue("agent response"),
253
+ isRegistered: vi.fn().mockReturnValue(true),
254
+ },
255
+ };
256
+
257
+ server.questionRelayedToSlack = false;
258
+ await server.relayAgentResponse(TEST_PAYLOAD);
259
+
260
+ expect(relaySpy).toHaveBeenCalledWith(
261
+ "test-task-id",
262
+ "test-run-id",
263
+ "agent response",
264
+ );
265
+ });
266
+
267
+ it("does not relay when no agent message is available", async () => {
268
+ const relaySpy = vi
269
+ .spyOn(server.posthogAPI, "relayMessage")
270
+ .mockResolvedValue(undefined);
271
+
272
+ server.session = {
273
+ payload: TEST_PAYLOAD,
274
+ logWriter: {
275
+ flush: vi.fn().mockResolvedValue(undefined),
276
+ getLastAgentMessage: vi.fn().mockReturnValue(null),
277
+ isRegistered: vi.fn().mockReturnValue(true),
278
+ },
279
+ };
280
+
281
+ server.questionRelayedToSlack = false;
282
+ await server.relayAgentResponse(TEST_PAYLOAD);
283
+
284
+ expect(relaySpy).not.toHaveBeenCalled();
285
+ });
286
+ });
287
+
288
+ describe("sendInitialTaskMessage prompt source", () => {
289
+ it("uses run state initial_prompt_override when present", async () => {
290
+ vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
291
+ id: "test-task-id",
292
+ title: "t",
293
+ description: "original task description",
294
+ } as any);
295
+ vi.spyOn(server.posthogAPI, "getTaskRun").mockResolvedValue({
296
+ id: "test-run-id",
297
+ task: "test-task-id",
298
+ state: { initial_prompt_override: "override instruction" },
299
+ } as any);
300
+
301
+ const promptSpy = vi.fn().mockResolvedValue({ stopReason: "max_tokens" });
302
+ server.session = {
303
+ payload: TEST_PAYLOAD,
304
+ acpSessionId: "acp-session",
305
+ clientConnection: { prompt: promptSpy },
306
+ };
307
+
308
+ await server.sendInitialTaskMessage(TEST_PAYLOAD);
309
+
310
+ expect(promptSpy).toHaveBeenCalledWith({
311
+ sessionId: "acp-session",
312
+ prompt: [{ type: "text", text: "override instruction" }],
313
+ });
314
+ });
315
+
316
+ it("falls back to task description when override is missing", async () => {
317
+ vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
318
+ id: "test-task-id",
319
+ title: "t",
320
+ description: "original task description",
321
+ } as any);
322
+ vi.spyOn(server.posthogAPI, "getTaskRun").mockResolvedValue({
323
+ id: "test-run-id",
324
+ task: "test-task-id",
325
+ state: {},
326
+ } as any);
327
+
328
+ const promptSpy = vi.fn().mockResolvedValue({ stopReason: "max_tokens" });
329
+ server.session = {
330
+ payload: TEST_PAYLOAD,
331
+ acpSessionId: "acp-session",
332
+ clientConnection: { prompt: promptSpy },
333
+ };
334
+
335
+ await server.sendInitialTaskMessage(TEST_PAYLOAD);
336
+
337
+ expect(promptSpy).toHaveBeenCalledWith({
338
+ sessionId: "acp-session",
339
+ prompt: [{ type: "text", text: "original task description" }],
340
+ });
341
+ });
342
+ });
343
+ });
@@ -137,6 +137,25 @@ describe("SessionLogWriter", () => {
137
137
  sessionUpdate: "agent_message",
138
138
  content: { type: "text", text: "Hello world" },
139
139
  });
140
+ expect(logWriter.getLastAgentMessage(sessionId)).toBe("Hello world");
141
+ });
142
+
143
+ it("tracks direct agent_message updates", async () => {
144
+ const sessionId = "s1";
145
+ logWriter.register(sessionId, { taskId: "t1", runId: sessionId });
146
+
147
+ logWriter.appendRawLine(
148
+ sessionId,
149
+ makeSessionUpdate("agent_message", {
150
+ content: { type: "text", text: "Pick MIT or Apache" },
151
+ }),
152
+ );
153
+
154
+ await logWriter.flush(sessionId);
155
+
156
+ expect(logWriter.getLastAgentMessage(sessionId)).toBe(
157
+ "Pick MIT or Apache",
158
+ );
140
159
  });
141
160
  });
142
161
 
@@ -22,6 +22,7 @@ interface ChunkBuffer {
22
22
  interface SessionState {
23
23
  context: SessionContext;
24
24
  chunkBuffer?: ChunkBuffer;
25
+ lastAgentMessage?: string;
25
26
  }
26
27
 
27
28
  export class SessionLogWriter {
@@ -50,11 +51,15 @@ export class SessionLogWriter {
50
51
 
51
52
  async flushAll(): Promise<void> {
52
53
  const sessionIds = [...this.sessions.keys()];
53
- const pendingCounts = sessionIds.map((id) => ({
54
- id,
55
- pending: this.pendingEntries.get(id)?.length ?? 0,
56
- messages: this.messageCounts.get(id) ?? 0,
57
- }));
54
+ const pendingCounts = sessionIds.map((id) => {
55
+ const session = this.sessions.get(id);
56
+ return {
57
+ taskId: session?.context.taskId,
58
+ runId: session?.context.runId,
59
+ pending: this.pendingEntries.get(id)?.length ?? 0,
60
+ messages: this.messageCounts.get(id) ?? 0,
61
+ };
62
+ });
58
63
  this.logger.info("flushAll called", {
59
64
  sessions: sessionIds.length,
60
65
  pending: pendingCounts,
@@ -73,8 +78,8 @@ export class SessionLogWriter {
73
78
  }
74
79
 
75
80
  this.logger.info("Session registered", {
76
- sessionId,
77
81
  taskId: context.taskId,
82
+ runId: context.runId,
78
83
  });
79
84
  this.sessions.set(sessionId, { context });
80
85
 
@@ -113,7 +118,11 @@ export class SessionLogWriter {
113
118
  const count = (this.messageCounts.get(sessionId) ?? 0) + 1;
114
119
  this.messageCounts.set(sessionId, count);
115
120
  if (count % 10 === 1) {
116
- this.logger.info("Messages received", { count, sessionId });
121
+ this.logger.info("Messages received", {
122
+ count,
123
+ taskId: session.context.taskId,
124
+ runId: session.context.runId,
125
+ });
117
126
  }
118
127
 
119
128
  try {
@@ -137,6 +146,11 @@ export class SessionLogWriter {
137
146
  // Non-chunk event: flush any buffered chunks first
138
147
  this.emitCoalescedMessage(sessionId, session);
139
148
 
149
+ const nonChunkAgentText = this.extractAgentMessageText(message);
150
+ if (nonChunkAgentText) {
151
+ session.lastAgentMessage = nonChunkAgentText;
152
+ }
153
+
140
154
  const entry: StoredNotification = {
141
155
  type: "notification",
142
156
  timestamp,
@@ -153,7 +167,8 @@ export class SessionLogWriter {
153
167
  }
154
168
  } catch {
155
169
  this.logger.warn("Failed to parse raw line for persistence", {
156
- sessionId,
170
+ taskId: session.context.taskId,
171
+ runId: session.context.runId,
157
172
  lineLength: line.length,
158
173
  });
159
174
  }
@@ -172,7 +187,8 @@ export class SessionLogWriter {
172
187
  const pending = this.pendingEntries.get(sessionId);
173
188
  if (!this.posthogAPI || !pending?.length) {
174
189
  this.logger.info("flush: nothing to persist", {
175
- sessionId,
190
+ taskId: session.context.taskId,
191
+ runId: session.context.runId,
176
192
  hasPosthogAPI: !!this.posthogAPI,
177
193
  pendingCount: pending?.length ?? 0,
178
194
  });
@@ -196,7 +212,8 @@ export class SessionLogWriter {
196
212
  );
197
213
  this.retryCounts.set(sessionId, 0);
198
214
  this.logger.info("Flushed session logs", {
199
- sessionId,
215
+ taskId: session.context.taskId,
216
+ runId: session.context.runId,
200
217
  entryCount: pending.length,
201
218
  });
202
219
  } catch (error) {
@@ -206,7 +223,11 @@ export class SessionLogWriter {
206
223
  if (retryCount >= SessionLogWriter.MAX_FLUSH_RETRIES) {
207
224
  this.logger.error(
208
225
  `Dropping ${pending.length} session log entries after ${retryCount} failed flush attempts`,
209
- { sessionId, error },
226
+ {
227
+ taskId: session.context.taskId,
228
+ runId: session.context.runId,
229
+ error,
230
+ },
210
231
  );
211
232
  this.retryCounts.set(sessionId, 0);
212
233
  } else {
@@ -245,6 +266,7 @@ export class SessionLogWriter {
245
266
 
246
267
  const { text, firstTimestamp } = session.chunkBuffer;
247
268
  session.chunkBuffer = undefined;
269
+ session.lastAgentMessage = text;
248
270
 
249
271
  const entry: StoredNotification = {
250
272
  type: "notification",
@@ -271,6 +293,39 @@ export class SessionLogWriter {
271
293
  }
272
294
  }
273
295
 
296
+ getLastAgentMessage(sessionId: string): string | undefined {
297
+ return this.sessions.get(sessionId)?.lastAgentMessage;
298
+ }
299
+
300
+ private extractAgentMessageText(
301
+ message: Record<string, unknown>,
302
+ ): string | null {
303
+ if (message.method !== "session/update") {
304
+ return null;
305
+ }
306
+
307
+ const params = message.params as Record<string, unknown> | undefined;
308
+ const update = params?.update as Record<string, unknown> | undefined;
309
+ if (update?.sessionUpdate !== "agent_message") {
310
+ return null;
311
+ }
312
+
313
+ const content = update.content as
314
+ | { type?: string; text?: string }
315
+ | undefined;
316
+ if (content?.type === "text" && typeof content.text === "string") {
317
+ const trimmed = content.text.trim();
318
+ return trimmed.length > 0 ? trimmed : null;
319
+ }
320
+
321
+ if (typeof update.message === "string") {
322
+ const trimmed = update.message.trim();
323
+ return trimmed.length > 0 ? trimmed : null;
324
+ }
325
+
326
+ return null;
327
+ }
328
+
274
329
  private scheduleFlush(sessionId: string): void {
275
330
  const existing = this.flushTimeouts.get(sessionId);
276
331
  if (existing) clearTimeout(existing);
@@ -316,7 +371,12 @@ export class SessionLogWriter {
316
371
  try {
317
372
  fs.appendFileSync(logPath, `${JSON.stringify(entry)}\n`);
318
373
  } catch (error) {
319
- this.logger.warn("Failed to write to local cache", { logPath, error });
374
+ this.logger.warn("Failed to write to local cache", {
375
+ taskId: session.context.taskId,
376
+ runId: session.context.runId,
377
+ logPath,
378
+ error,
379
+ });
320
380
  }
321
381
  }
322
382
  }
@@ -5,6 +5,7 @@ type AnyHttpResponse = Response | ReturnType<typeof HttpResponse.json>;
5
5
  export interface PostHogHandlersOptions {
6
6
  baseUrl?: string;
7
7
  onAppendLog?: (entries: unknown[]) => void;
8
+ getTask?: () => unknown;
8
9
  getTaskRun?: () => unknown;
9
10
  appendLogResponse?: () => AnyHttpResponse;
10
11
  }
@@ -13,6 +14,7 @@ export function createPostHogHandlers(options: PostHogHandlersOptions = {}) {
13
14
  const {
14
15
  baseUrl = "http://localhost:8000",
15
16
  onAppendLog,
17
+ getTask,
16
18
  getTaskRun,
17
19
  appendLogResponse,
18
20
  } = options;
@@ -33,13 +35,35 @@ export function createPostHogHandlers(options: PostHogHandlersOptions = {}) {
33
35
  },
34
36
  ),
35
37
 
38
+ // GET /tasks/:taskId - Fetch task details
39
+ http.get(`${baseUrl}/api/projects/:projectId/tasks/:taskId/`, () => {
40
+ const task = getTask?.() ?? {
41
+ id: "test-task-id",
42
+ title: "Test task",
43
+ description: null,
44
+ origin_product: "user_created",
45
+ repository: "test/repo",
46
+ created_at: new Date().toISOString(),
47
+ updated_at: new Date().toISOString(),
48
+ };
49
+ return HttpResponse.json(task);
50
+ }),
51
+
36
52
  // GET /runs/:runId - Fetch task run details
37
53
  http.get(
38
- `${baseUrl}/api/projects/:projectId/tasks/:taskId/runs/:runId`,
54
+ `${baseUrl}/api/projects/:projectId/tasks/:taskId/runs/:runId/`,
39
55
  () => {
40
56
  const taskRun = getTaskRun?.() ?? { log_url: "" };
41
57
  return HttpResponse.json(taskRun);
42
58
  },
43
59
  ),
60
+
61
+ // PATCH /runs/:runId - Update task run
62
+ http.patch(
63
+ `${baseUrl}/api/projects/:projectId/tasks/:taskId/runs/:runId/`,
64
+ () => {
65
+ return HttpResponse.json({});
66
+ },
67
+ ),
44
68
  ];
45
69
  }