@posthog/agent 2.3.556 → 2.3.616

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@posthog/agent",
3
- "version": "2.3.556",
3
+ "version": "2.3.616",
4
4
  "repository": "https://github.com/PostHog/code",
5
5
  "description": "TypeScript agent framework wrapping Claude Agent SDK with Git-based task execution for PostHog",
6
6
  "exports": {
@@ -107,8 +107,8 @@
107
107
  "typescript": "^5.5.0",
108
108
  "vitest": "^2.1.8",
109
109
  "@posthog/shared": "1.0.0",
110
- "@posthog/git": "1.0.0",
111
- "@posthog/enricher": "1.0.0"
110
+ "@posthog/enricher": "1.0.0",
111
+ "@posthog/git": "1.0.0"
112
112
  },
113
113
  "dependencies": {
114
114
  "@agentclientprotocol/sdk": "0.19.0",
@@ -22,6 +22,7 @@ import { image, text } from "../../../utils/acp-content";
22
22
  import { unreachable } from "../../../utils/common";
23
23
  import type { Logger } from "../../../utils/logger";
24
24
  import { tryParsePartialJson } from "../../../utils/partial-json";
25
+ import { classifyAgentError } from "../../error-classification";
25
26
  import { type EnrichedReadCache, registerHookCallback } from "../hooks";
26
27
  import type {
27
28
  Session,
@@ -696,32 +697,6 @@ export type ResultMessageHandlerResult = {
696
697
  };
697
698
  };
698
699
 
699
- export type AgentErrorClassification =
700
- | "upstream_stream_terminated"
701
- | "upstream_connection_error"
702
- | "agent_error";
703
-
704
- /**
705
- * Classify an error string surfaced by the Claude CLI via `is_error: true`
706
- * result messages. Transient upstream-stream terminations (e.g. the fetch body
707
- * from the LLM gateway is torn down mid-stream) are retriable; most other
708
- * errors are not.
709
- */
710
- export function classifyAgentError(
711
- result: string | undefined,
712
- ): AgentErrorClassification {
713
- if (!result) return "agent_error";
714
- const text = result.trim();
715
- // Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
716
- if (/API Error:\s*terminated\b/i.test(text)) {
717
- return "upstream_stream_terminated";
718
- }
719
- if (/API Error:\s*Connection error\b/i.test(text)) {
720
- return "upstream_connection_error";
721
- }
722
- return "agent_error";
723
- }
724
-
725
700
  export function handleResultMessage(
726
701
  message: SDKResultMessage,
727
702
  ): ResultMessageHandlerResult {
@@ -306,6 +306,89 @@ describe("CodexAcpAgent", () => {
306
306
  ).resolves.toEqual({ stopReason: "end_turn" });
307
307
  });
308
308
 
309
+ it.each([
310
+ ["API Error: 429 rate_limit_error", "upstream_provider_failure"],
311
+ ["API Error: 503 internal_error", "upstream_provider_failure"],
312
+ ["API Error: 529 overloaded_error", "upstream_provider_failure"],
313
+ ["ordinary failure", undefined],
314
+ ] as const)(
315
+ "handles prompt failure %p",
316
+ async (message, expectedClassification) => {
317
+ const { agent } = createAgent();
318
+ mockCodexConnection.newSession.mockResolvedValue({
319
+ sessionId: "session-1",
320
+ modes: { currentModeId: "auto", availableModes: [] },
321
+ configOptions: [],
322
+ } satisfies Partial<NewSessionResponse>);
323
+ await agent.newSession({
324
+ cwd: process.cwd(),
325
+ } as never);
326
+
327
+ const promptError = new Error(message);
328
+ mockCodexConnection.prompt.mockRejectedValueOnce(promptError);
329
+
330
+ let thrown: unknown;
331
+ try {
332
+ await agent.prompt({
333
+ sessionId: "session-1",
334
+ prompt: [{ type: "text", text: "A" }],
335
+ } as never);
336
+ } catch (error) {
337
+ thrown = error;
338
+ }
339
+
340
+ if (!expectedClassification) {
341
+ expect(thrown).toBe(promptError);
342
+ return;
343
+ }
344
+
345
+ expect(thrown).toMatchObject({
346
+ data: {
347
+ classification: expectedClassification,
348
+ result: message,
349
+ },
350
+ });
351
+ },
352
+ );
353
+
354
+ it("does not let a classified failing prompt block subsequent prompts", async () => {
355
+ const { agent } = createAgent();
356
+ mockCodexConnection.newSession.mockResolvedValue({
357
+ sessionId: "session-1",
358
+ modes: { currentModeId: "auto", availableModes: [] },
359
+ configOptions: [],
360
+ } satisfies Partial<NewSessionResponse>);
361
+ await agent.newSession({
362
+ cwd: process.cwd(),
363
+ } as never);
364
+
365
+ mockCodexConnection.prompt.mockRejectedValueOnce(
366
+ new Error("API Error: 529 overloaded_error"),
367
+ );
368
+ mockCodexConnection.prompt.mockResolvedValueOnce({
369
+ stopReason: "end_turn",
370
+ });
371
+
372
+ await expect(
373
+ agent.prompt({
374
+ sessionId: "session-1",
375
+ prompt: [{ type: "text", text: "A" }],
376
+ } as never),
377
+ ).rejects.toMatchObject({
378
+ data: {
379
+ classification: "upstream_provider_failure",
380
+ result: "API Error: 529 overloaded_error",
381
+ },
382
+ });
383
+
384
+ await expect(
385
+ agent.prompt({
386
+ sessionId: "session-1",
387
+ prompt: [{ type: "text", text: "B" }],
388
+ } as never),
389
+ ).resolves.toEqual({ stopReason: "end_turn" });
390
+ });
391
+
309
392
  describe("structured output injection", () => {
310
393
  const schema = {
311
394
  type: "object",
@@ -62,6 +62,7 @@ import {
62
62
  nodeWritableToWebWritable,
63
63
  } from "../../utils/streams";
64
64
  import { BaseAcpAgent, type BaseSession } from "../base-acp-agent";
65
+ import { classifyAgentError } from "../error-classification";
65
66
  import { createCodexClient } from "./codex-client";
66
67
  import { normalizeCodexConfigOptions } from "./models";
67
68
  import {
@@ -138,6 +139,19 @@ function prependPrContext(params: PromptRequest): PromptRequest {
138
139
  };
139
140
  }
140
141
 
142
+ function classifyPromptError(error: unknown): unknown {
143
+ const message = error instanceof Error ? error.message : String(error ?? "");
144
+ const classification = classifyAgentError(message);
145
+ if (classification === "agent_error") {
146
+ return error;
147
+ }
148
+
149
+ return RequestError.internalError(
150
+ { classification, result: message },
151
+ message,
152
+ );
153
+ }
154
+
141
155
  const CODEX_NATIVE_MODE: Record<CodeExecutionMode, CodexNativeMode> = {
142
156
  auto: "auto",
143
157
  default: "auto",
@@ -577,6 +591,8 @@ export class CodexAcpAgent extends BaseAcpAgent {
577
591
  let response: PromptResponse;
578
592
  try {
579
593
  response = await this.codexConnection.prompt(prependPrContext(params));
594
+ } catch (error) {
595
+ throw classifyPromptError(error);
580
596
  } finally {
581
597
  this.session.promptRunning = false;
582
598
  }
@@ -0,0 +1,30 @@
1
+ export type AgentErrorClassification =
2
+ | "upstream_stream_terminated"
3
+ | "upstream_connection_error"
4
+ | "upstream_provider_failure"
5
+ | "agent_error";
6
+
7
+ const UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN = /API Error:\s*(?:429|5\d\d)\b/i;
8
+
9
+ /**
10
+ * Classify error strings surfaced by agent adapters. Transient upstream
11
+ * failures are retriable when they match exact stream/connection patterns or
12
+ * retryable provider HTTP statuses; most other errors are not.
13
+ */
14
+ export function classifyAgentError(
15
+ result: string | undefined,
16
+ ): AgentErrorClassification {
17
+ if (!result) return "agent_error";
18
+ const text = result.trim();
19
+ // Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
20
+ if (/API Error:\s*terminated\b/i.test(text)) {
21
+ return "upstream_stream_terminated";
22
+ }
23
+ if (/API Error:\s*Connection error\b/i.test(text)) {
24
+ return "upstream_connection_error";
25
+ }
26
+ if (UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN.test(text)) {
27
+ return "upstream_provider_failure";
28
+ }
29
+ return "agent_error";
30
+ }
@@ -24,7 +24,7 @@ import {
24
24
  import {
25
25
  type AgentErrorClassification,
26
26
  classifyAgentError,
27
- } from "../adapters/claude/conversion/sdk-to-acp";
27
+ } from "../adapters/error-classification";
28
28
  import type { PermissionMode } from "../execution-mode";
29
29
  import { DEFAULT_CODEX_MODEL } from "../gateway-models";
30
30
  import { HandoffCheckpointTracker } from "../handoff-checkpoint";
@@ -65,9 +65,20 @@ import type { AgentServerConfig } from "./types";
65
65
  const agentErrorClassificationSchema = z.enum([
66
66
  "upstream_stream_terminated",
67
67
  "upstream_connection_error",
68
+ "upstream_provider_failure",
68
69
  "agent_error",
69
70
  ]) satisfies z.ZodType<AgentErrorClassification>;
70
71
 
72
+ export const UPSTREAM_PROVIDER_FAILURE_MESSAGE =
73
+ "The upstream AI provider failed to process the request. Please retry the task in a few minutes.";
74
+
75
+ const upstreamProviderFailureClassifications =
76
+ new Set<AgentErrorClassification>([
77
+ "upstream_stream_terminated",
78
+ "upstream_connection_error",
79
+ "upstream_provider_failure",
80
+ ]);
81
+
71
82
  const errorWithClassificationSchema = z.object({
72
83
  data: z.object({ classification: agentErrorClassificationSchema }),
73
84
  });
@@ -1051,12 +1062,11 @@ export class AgentServer {
1051
1062
  error: unknown,
1052
1063
  ): Promise<void> {
1053
1064
  const { classification, message } = this.extractErrorClassification(error);
1054
- const errorMessage =
1055
- classification === "upstream_stream_terminated"
1056
- ? "Upstream LLM stream terminated"
1057
- : classification === "upstream_connection_error"
1058
- ? "Upstream LLM connection error"
1059
- : message || "Agent error";
1065
+ const errorMessage = upstreamProviderFailureClassifications.has(
1066
+ classification,
1067
+ )
1068
+ ? UPSTREAM_PROVIDER_FAILURE_MESSAGE
1069
+ : message || "Agent error";
1060
1070
  this.logger.error(`send_${phase}_task_message_failed`, {
1061
1071
  classification,
1062
1072
  message,
@@ -1,11 +1,11 @@
1
1
  import { type SetupServerApi, setupServer } from "msw/node";
2
2
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
3
- import { classifyAgentError } from "../adapters/claude/conversion/sdk-to-acp";
3
+ import { classifyAgentError } from "../adapters/error-classification";
4
4
  import type { PostHogAPIClient } from "../posthog-api";
5
5
  import { createTestRepo, type TestRepo } from "../test/fixtures/api";
6
6
  import { createPostHogHandlers } from "../test/mocks/msw-handlers";
7
7
  import type { Task, TaskRun } from "../types";
8
- import { AgentServer } from "./agent-server";
8
+ import { AgentServer, UPSTREAM_PROVIDER_FAILURE_MESSAGE } from "./agent-server";
9
9
 
10
10
  interface TestableAgentServer {
11
11
  posthogAPI: PostHogAPIClient;
@@ -76,10 +76,28 @@ function createTransientConnectionError(): Error & {
76
76
  return error;
77
77
  }
78
78
 
79
+ function createUpstreamProviderFailureError(): Error & {
80
+ data: { classification: string; result: string };
81
+ } {
82
+ const result =
83
+ 'API Error: 529 {"error":{"message":"{\\"type\\":\\"error\\",\\"error\\":{\\"type\\":\\"overloaded_error\\",\\"message\\":\\"Overloaded\\"}}","type":"api_error"}}';
84
+ const error = new Error(result) as Error & {
85
+ data: { classification: string; result: string };
86
+ };
87
+ error.data = {
88
+ classification: "upstream_provider_failure",
89
+ result,
90
+ };
91
+ return error;
92
+ }
93
+
79
94
  describe("Question relay", () => {
80
95
  it.each([
81
96
  ["API Error: terminated", "upstream_stream_terminated"],
82
97
  ["API Error: Connection error", "upstream_connection_error"],
98
+ ["API Error: 429 rate_limit_error", "upstream_provider_failure"],
99
+ ["API Error: 529 overloaded_error", "upstream_provider_failure"],
100
+ ["API Error: 503 internal_error", "upstream_provider_failure"],
83
101
  ["something else", "agent_error"],
84
102
  [undefined, "agent_error"],
85
103
  ])("classifies %p as %s", (message, expected) => {
@@ -590,12 +608,56 @@ describe("Question relay", () => {
590
608
  "test-run-id",
591
609
  {
592
610
  status: "failed",
593
- error_message: "Upstream LLM stream terminated",
611
+ error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
612
+ },
613
+ );
614
+ });
615
+
616
+ it("surfaces upstream provider failures with a retryable message", async () => {
617
+ vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
618
+ id: "test-task-id",
619
+ title: "t",
620
+ description: "original task description",
621
+ } as unknown as Task);
622
+ vi.spyOn(server.posthogAPI, "getTaskRun").mockResolvedValue({
623
+ id: "test-run-id",
624
+ task: "test-task-id",
625
+ state: {},
626
+ } as unknown as TaskRun);
627
+
628
+ const promptSpy = vi
629
+ .fn()
630
+ .mockRejectedValueOnce(createUpstreamProviderFailureError());
631
+ const updateTaskRunSpy = vi
632
+ .spyOn(server.posthogAPI, "updateTaskRun")
633
+ .mockResolvedValue({} as TaskRun);
634
+ server.session = {
635
+ payload: TEST_PAYLOAD,
636
+ acpSessionId: "acp-session",
637
+ clientConnection: { prompt: promptSpy },
638
+ logWriter: {
639
+ flushAll: vi.fn().mockResolvedValue(undefined),
640
+ getFullAgentResponse: vi.fn().mockReturnValue(null),
641
+ resetTurnMessages: vi.fn(),
642
+ flush: vi.fn().mockResolvedValue(undefined),
643
+ isRegistered: vi.fn().mockReturnValue(true),
644
+ },
645
+ };
646
+
647
+ await server.sendInitialTaskMessage(TEST_PAYLOAD);
648
+
649
+ expect(promptSpy).toHaveBeenCalledTimes(1);
650
+ expect(updateTaskRunSpy).toHaveBeenCalledWith(
651
+ "test-task-id",
652
+ "test-run-id",
653
+ {
654
+ status: "failed",
655
+ error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
594
656
  },
595
657
  );
596
658
  });
597
659
 
598
- it("surfaces upstream connection errors with the connection-specific message", async () => {
660
+ it("surfaces upstream connection errors with the shared provider failure message", async () => {
599
661
  vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
600
662
  id: "test-task-id",
601
663
  title: "t",
@@ -634,7 +696,7 @@ describe("Question relay", () => {
634
696
  "test-run-id",
635
697
  {
636
698
  status: "failed",
637
- error_message: "Upstream LLM connection error",
699
+ error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
638
700
  },
639
701
  );
640
702
  });