@posthog/agent 2.3.548 → 2.3.616
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +105 -31
- package/dist/agent.js.map +1 -1
- package/dist/handoff-checkpoint.js +142 -117
- package/dist/handoff-checkpoint.js.map +1 -1
- package/dist/posthog-api.js +1 -1
- package/dist/posthog-api.js.map +1 -1
- package/dist/server/agent-server.d.ts +2 -1
- package/dist/server/agent-server.js +155 -68
- package/dist/server/agent-server.js.map +1 -1
- package/dist/server/bin.cjs +166 -80
- package/dist/server/bin.cjs.map +1 -1
- package/package.json +3 -3
- package/src/adapters/claude/conversion/sdk-to-acp.ts +1 -26
- package/src/adapters/claude/session/options.ts +8 -0
- package/src/adapters/codex/codex-agent.test.ts +83 -0
- package/src/adapters/codex/codex-agent.ts +16 -0
- package/src/adapters/error-classification.ts +30 -0
- package/src/server/agent-server.ts +17 -7
- package/src/server/question-relay.test.ts +67 -5
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@posthog/agent",
|
|
3
|
-
"version": "2.3.
|
|
3
|
+
"version": "2.3.616",
|
|
4
4
|
"repository": "https://github.com/PostHog/code",
|
|
5
5
|
"description": "TypeScript agent framework wrapping Claude Agent SDK with Git-based task execution for PostHog",
|
|
6
6
|
"exports": {
|
|
@@ -107,8 +107,8 @@
|
|
|
107
107
|
"typescript": "^5.5.0",
|
|
108
108
|
"vitest": "^2.1.8",
|
|
109
109
|
"@posthog/shared": "1.0.0",
|
|
110
|
-
"@posthog/
|
|
111
|
-
"@posthog/
|
|
110
|
+
"@posthog/enricher": "1.0.0",
|
|
111
|
+
"@posthog/git": "1.0.0"
|
|
112
112
|
},
|
|
113
113
|
"dependencies": {
|
|
114
114
|
"@agentclientprotocol/sdk": "0.19.0",
|
|
@@ -22,6 +22,7 @@ import { image, text } from "../../../utils/acp-content";
|
|
|
22
22
|
import { unreachable } from "../../../utils/common";
|
|
23
23
|
import type { Logger } from "../../../utils/logger";
|
|
24
24
|
import { tryParsePartialJson } from "../../../utils/partial-json";
|
|
25
|
+
import { classifyAgentError } from "../../error-classification";
|
|
25
26
|
import { type EnrichedReadCache, registerHookCallback } from "../hooks";
|
|
26
27
|
import type {
|
|
27
28
|
Session,
|
|
@@ -696,32 +697,6 @@ export type ResultMessageHandlerResult = {
|
|
|
696
697
|
};
|
|
697
698
|
};
|
|
698
699
|
|
|
699
|
-
export type AgentErrorClassification =
|
|
700
|
-
| "upstream_stream_terminated"
|
|
701
|
-
| "upstream_connection_error"
|
|
702
|
-
| "agent_error";
|
|
703
|
-
|
|
704
|
-
/**
|
|
705
|
-
* Classify an error string surfaced by the Claude CLI via `is_error: true`
|
|
706
|
-
* result messages. Transient upstream-stream terminations (e.g. the fetch body
|
|
707
|
-
* from the LLM gateway is torn down mid-stream) are retriable; most other
|
|
708
|
-
* errors are not.
|
|
709
|
-
*/
|
|
710
|
-
export function classifyAgentError(
|
|
711
|
-
result: string | undefined,
|
|
712
|
-
): AgentErrorClassification {
|
|
713
|
-
if (!result) return "agent_error";
|
|
714
|
-
const text = result.trim();
|
|
715
|
-
// Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
|
|
716
|
-
if (/API Error:\s*terminated\b/i.test(text)) {
|
|
717
|
-
return "upstream_stream_terminated";
|
|
718
|
-
}
|
|
719
|
-
if (/API Error:\s*Connection error\b/i.test(text)) {
|
|
720
|
-
return "upstream_connection_error";
|
|
721
|
-
}
|
|
722
|
-
return "agent_error";
|
|
723
|
-
}
|
|
724
|
-
|
|
725
700
|
export function handleResultMessage(
|
|
726
701
|
message: SDKResultMessage,
|
|
727
702
|
): ResultMessageHandlerResult {
|
|
@@ -102,6 +102,12 @@ function buildMcpServers(
|
|
|
102
102
|
}
|
|
103
103
|
|
|
104
104
|
function buildEnvironment(): Record<string, string> {
|
|
105
|
+
const bedrockFallbackHeader = "x-posthog-use-bedrock-fallback: true";
|
|
106
|
+
const existingCustomHeaders = process.env.ANTHROPIC_CUSTOM_HEADERS;
|
|
107
|
+
const customHeaders = existingCustomHeaders
|
|
108
|
+
? `${existingCustomHeaders}\n${bedrockFallbackHeader}`
|
|
109
|
+
: bedrockFallbackHeader;
|
|
110
|
+
|
|
105
111
|
return {
|
|
106
112
|
...process.env,
|
|
107
113
|
ELECTRON_RUN_AS_NODE: "1",
|
|
@@ -110,6 +116,8 @@ function buildEnvironment(): Record<string, string> {
|
|
|
110
116
|
ENABLE_TOOL_SEARCH: "auto:0",
|
|
111
117
|
// Enable idle state as end-of-turn signal (required for SDK 0.2.114+)
|
|
112
118
|
CLAUDE_CODE_EMIT_SESSION_STATE_EVENTS: "1",
|
|
119
|
+
// Route to AWS Bedrock as a fallback when Anthropic returns 5xx
|
|
120
|
+
ANTHROPIC_CUSTOM_HEADERS: customHeaders,
|
|
113
121
|
};
|
|
114
122
|
}
|
|
115
123
|
|
|
@@ -306,6 +306,89 @@ describe("CodexAcpAgent", () => {
|
|
|
306
306
|
).resolves.toEqual({ stopReason: "end_turn" });
|
|
307
307
|
});
|
|
308
308
|
|
|
309
|
+
it.each([
|
|
310
|
+
["API Error: 429 rate_limit_error", "upstream_provider_failure"],
|
|
311
|
+
["API Error: 503 internal_error", "upstream_provider_failure"],
|
|
312
|
+
["API Error: 529 overloaded_error", "upstream_provider_failure"],
|
|
313
|
+
["ordinary failure", undefined],
|
|
314
|
+
] as const)(
|
|
315
|
+
"handles prompt failure %p",
|
|
316
|
+
async (message, expectedClassification) => {
|
|
317
|
+
const { agent } = createAgent();
|
|
318
|
+
mockCodexConnection.newSession.mockResolvedValue({
|
|
319
|
+
sessionId: "session-1",
|
|
320
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
321
|
+
configOptions: [],
|
|
322
|
+
} satisfies Partial<NewSessionResponse>);
|
|
323
|
+
await agent.newSession({
|
|
324
|
+
cwd: process.cwd(),
|
|
325
|
+
} as never);
|
|
326
|
+
|
|
327
|
+
const promptError = new Error(message);
|
|
328
|
+
mockCodexConnection.prompt.mockRejectedValueOnce(promptError);
|
|
329
|
+
|
|
330
|
+
let thrown: unknown;
|
|
331
|
+
try {
|
|
332
|
+
await agent.prompt({
|
|
333
|
+
sessionId: "session-1",
|
|
334
|
+
prompt: [{ type: "text", text: "A" }],
|
|
335
|
+
} as never);
|
|
336
|
+
} catch (error) {
|
|
337
|
+
thrown = error;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!expectedClassification) {
|
|
341
|
+
expect(thrown).toBe(promptError);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
expect(thrown).toMatchObject({
|
|
346
|
+
data: {
|
|
347
|
+
classification: expectedClassification,
|
|
348
|
+
result: message,
|
|
349
|
+
},
|
|
350
|
+
});
|
|
351
|
+
},
|
|
352
|
+
);
|
|
353
|
+
|
|
354
|
+
it("does not let a classified failing prompt block subsequent prompts", async () => {
|
|
355
|
+
const { agent } = createAgent();
|
|
356
|
+
mockCodexConnection.newSession.mockResolvedValue({
|
|
357
|
+
sessionId: "session-1",
|
|
358
|
+
modes: { currentModeId: "auto", availableModes: [] },
|
|
359
|
+
configOptions: [],
|
|
360
|
+
} satisfies Partial<NewSessionResponse>);
|
|
361
|
+
await agent.newSession({
|
|
362
|
+
cwd: process.cwd(),
|
|
363
|
+
} as never);
|
|
364
|
+
|
|
365
|
+
mockCodexConnection.prompt.mockRejectedValueOnce(
|
|
366
|
+
new Error("API Error: 529 overloaded_error"),
|
|
367
|
+
);
|
|
368
|
+
mockCodexConnection.prompt.mockResolvedValueOnce({
|
|
369
|
+
stopReason: "end_turn",
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
await expect(
|
|
373
|
+
agent.prompt({
|
|
374
|
+
sessionId: "session-1",
|
|
375
|
+
prompt: [{ type: "text", text: "A" }],
|
|
376
|
+
} as never),
|
|
377
|
+
).rejects.toMatchObject({
|
|
378
|
+
data: {
|
|
379
|
+
classification: "upstream_provider_failure",
|
|
380
|
+
result: "API Error: 529 overloaded_error",
|
|
381
|
+
},
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
await expect(
|
|
385
|
+
agent.prompt({
|
|
386
|
+
sessionId: "session-1",
|
|
387
|
+
prompt: [{ type: "text", text: "B" }],
|
|
388
|
+
} as never),
|
|
389
|
+
).resolves.toEqual({ stopReason: "end_turn" });
|
|
390
|
+
});
|
|
391
|
+
|
|
309
392
|
describe("structured output injection", () => {
|
|
310
393
|
const schema = {
|
|
311
394
|
type: "object",
|
|
@@ -62,6 +62,7 @@ import {
|
|
|
62
62
|
nodeWritableToWebWritable,
|
|
63
63
|
} from "../../utils/streams";
|
|
64
64
|
import { BaseAcpAgent, type BaseSession } from "../base-acp-agent";
|
|
65
|
+
import { classifyAgentError } from "../error-classification";
|
|
65
66
|
import { createCodexClient } from "./codex-client";
|
|
66
67
|
import { normalizeCodexConfigOptions } from "./models";
|
|
67
68
|
import {
|
|
@@ -138,6 +139,19 @@ function prependPrContext(params: PromptRequest): PromptRequest {
|
|
|
138
139
|
};
|
|
139
140
|
}
|
|
140
141
|
|
|
142
|
+
function classifyPromptError(error: unknown): unknown {
|
|
143
|
+
const message = error instanceof Error ? error.message : String(error ?? "");
|
|
144
|
+
const classification = classifyAgentError(message);
|
|
145
|
+
if (classification === "agent_error") {
|
|
146
|
+
return error;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return RequestError.internalError(
|
|
150
|
+
{ classification, result: message },
|
|
151
|
+
message,
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
|
|
141
155
|
const CODEX_NATIVE_MODE: Record<CodeExecutionMode, CodexNativeMode> = {
|
|
142
156
|
auto: "auto",
|
|
143
157
|
default: "auto",
|
|
@@ -577,6 +591,8 @@ export class CodexAcpAgent extends BaseAcpAgent {
|
|
|
577
591
|
let response: PromptResponse;
|
|
578
592
|
try {
|
|
579
593
|
response = await this.codexConnection.prompt(prependPrContext(params));
|
|
594
|
+
} catch (error) {
|
|
595
|
+
throw classifyPromptError(error);
|
|
580
596
|
} finally {
|
|
581
597
|
this.session.promptRunning = false;
|
|
582
598
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export type AgentErrorClassification =
|
|
2
|
+
| "upstream_stream_terminated"
|
|
3
|
+
| "upstream_connection_error"
|
|
4
|
+
| "upstream_provider_failure"
|
|
5
|
+
| "agent_error";
|
|
6
|
+
|
|
7
|
+
const UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN = /API Error:\s*(?:429|5\d\d)\b/i;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Classify error strings surfaced by agent adapters. Transient upstream
|
|
11
|
+
* failures are retriable when they match exact stream/connection patterns or
|
|
12
|
+
* retryable provider HTTP statuses; most other errors are not.
|
|
13
|
+
*/
|
|
14
|
+
export function classifyAgentError(
|
|
15
|
+
result: string | undefined,
|
|
16
|
+
): AgentErrorClassification {
|
|
17
|
+
if (!result) return "agent_error";
|
|
18
|
+
const text = result.trim();
|
|
19
|
+
// Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
|
|
20
|
+
if (/API Error:\s*terminated\b/i.test(text)) {
|
|
21
|
+
return "upstream_stream_terminated";
|
|
22
|
+
}
|
|
23
|
+
if (/API Error:\s*Connection error\b/i.test(text)) {
|
|
24
|
+
return "upstream_connection_error";
|
|
25
|
+
}
|
|
26
|
+
if (UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN.test(text)) {
|
|
27
|
+
return "upstream_provider_failure";
|
|
28
|
+
}
|
|
29
|
+
return "agent_error";
|
|
30
|
+
}
|
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
import {
|
|
25
25
|
type AgentErrorClassification,
|
|
26
26
|
classifyAgentError,
|
|
27
|
-
} from "../adapters/
|
|
27
|
+
} from "../adapters/error-classification";
|
|
28
28
|
import type { PermissionMode } from "../execution-mode";
|
|
29
29
|
import { DEFAULT_CODEX_MODEL } from "../gateway-models";
|
|
30
30
|
import { HandoffCheckpointTracker } from "../handoff-checkpoint";
|
|
@@ -65,9 +65,20 @@ import type { AgentServerConfig } from "./types";
|
|
|
65
65
|
const agentErrorClassificationSchema = z.enum([
|
|
66
66
|
"upstream_stream_terminated",
|
|
67
67
|
"upstream_connection_error",
|
|
68
|
+
"upstream_provider_failure",
|
|
68
69
|
"agent_error",
|
|
69
70
|
]) satisfies z.ZodType<AgentErrorClassification>;
|
|
70
71
|
|
|
72
|
+
export const UPSTREAM_PROVIDER_FAILURE_MESSAGE =
|
|
73
|
+
"The upstream AI provider failed to process the request. Please retry the task in a few minutes.";
|
|
74
|
+
|
|
75
|
+
const upstreamProviderFailureClassifications =
|
|
76
|
+
new Set<AgentErrorClassification>([
|
|
77
|
+
"upstream_stream_terminated",
|
|
78
|
+
"upstream_connection_error",
|
|
79
|
+
"upstream_provider_failure",
|
|
80
|
+
]);
|
|
81
|
+
|
|
71
82
|
const errorWithClassificationSchema = z.object({
|
|
72
83
|
data: z.object({ classification: agentErrorClassificationSchema }),
|
|
73
84
|
});
|
|
@@ -1051,12 +1062,11 @@ export class AgentServer {
|
|
|
1051
1062
|
error: unknown,
|
|
1052
1063
|
): Promise<void> {
|
|
1053
1064
|
const { classification, message } = this.extractErrorClassification(error);
|
|
1054
|
-
const errorMessage =
|
|
1055
|
-
classification
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
: message || "Agent error";
|
|
1065
|
+
const errorMessage = upstreamProviderFailureClassifications.has(
|
|
1066
|
+
classification,
|
|
1067
|
+
)
|
|
1068
|
+
? UPSTREAM_PROVIDER_FAILURE_MESSAGE
|
|
1069
|
+
: message || "Agent error";
|
|
1060
1070
|
this.logger.error(`send_${phase}_task_message_failed`, {
|
|
1061
1071
|
classification,
|
|
1062
1072
|
message,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import { type SetupServerApi, setupServer } from "msw/node";
|
|
2
2
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
3
|
-
import { classifyAgentError } from "../adapters/
|
|
3
|
+
import { classifyAgentError } from "../adapters/error-classification";
|
|
4
4
|
import type { PostHogAPIClient } from "../posthog-api";
|
|
5
5
|
import { createTestRepo, type TestRepo } from "../test/fixtures/api";
|
|
6
6
|
import { createPostHogHandlers } from "../test/mocks/msw-handlers";
|
|
7
7
|
import type { Task, TaskRun } from "../types";
|
|
8
|
-
import { AgentServer } from "./agent-server";
|
|
8
|
+
import { AgentServer, UPSTREAM_PROVIDER_FAILURE_MESSAGE } from "./agent-server";
|
|
9
9
|
|
|
10
10
|
interface TestableAgentServer {
|
|
11
11
|
posthogAPI: PostHogAPIClient;
|
|
@@ -76,10 +76,28 @@ function createTransientConnectionError(): Error & {
|
|
|
76
76
|
return error;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
function createUpstreamProviderFailureError(): Error & {
|
|
80
|
+
data: { classification: string; result: string };
|
|
81
|
+
} {
|
|
82
|
+
const result =
|
|
83
|
+
'API Error: 529 {"error":{"message":"{\\"type\\":\\"error\\",\\"error\\":{\\"type\\":\\"overloaded_error\\",\\"message\\":\\"Overloaded\\"}}","type":"api_error"}}';
|
|
84
|
+
const error = new Error(result) as Error & {
|
|
85
|
+
data: { classification: string; result: string };
|
|
86
|
+
};
|
|
87
|
+
error.data = {
|
|
88
|
+
classification: "upstream_provider_failure",
|
|
89
|
+
result,
|
|
90
|
+
};
|
|
91
|
+
return error;
|
|
92
|
+
}
|
|
93
|
+
|
|
79
94
|
describe("Question relay", () => {
|
|
80
95
|
it.each([
|
|
81
96
|
["API Error: terminated", "upstream_stream_terminated"],
|
|
82
97
|
["API Error: Connection error", "upstream_connection_error"],
|
|
98
|
+
["API Error: 429 rate_limit_error", "upstream_provider_failure"],
|
|
99
|
+
["API Error: 529 overloaded_error", "upstream_provider_failure"],
|
|
100
|
+
["API Error: 503 internal_error", "upstream_provider_failure"],
|
|
83
101
|
["something else", "agent_error"],
|
|
84
102
|
[undefined, "agent_error"],
|
|
85
103
|
])("classifies %p as %s", (message, expected) => {
|
|
@@ -590,12 +608,56 @@ describe("Question relay", () => {
|
|
|
590
608
|
"test-run-id",
|
|
591
609
|
{
|
|
592
610
|
status: "failed",
|
|
593
|
-
error_message:
|
|
611
|
+
error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
|
|
612
|
+
},
|
|
613
|
+
);
|
|
614
|
+
});
|
|
615
|
+
|
|
616
|
+
it("surfaces upstream provider failures with a retryable message", async () => {
|
|
617
|
+
vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
|
|
618
|
+
id: "test-task-id",
|
|
619
|
+
title: "t",
|
|
620
|
+
description: "original task description",
|
|
621
|
+
} as unknown as Task);
|
|
622
|
+
vi.spyOn(server.posthogAPI, "getTaskRun").mockResolvedValue({
|
|
623
|
+
id: "test-run-id",
|
|
624
|
+
task: "test-task-id",
|
|
625
|
+
state: {},
|
|
626
|
+
} as unknown as TaskRun);
|
|
627
|
+
|
|
628
|
+
const promptSpy = vi
|
|
629
|
+
.fn()
|
|
630
|
+
.mockRejectedValueOnce(createUpstreamProviderFailureError());
|
|
631
|
+
const updateTaskRunSpy = vi
|
|
632
|
+
.spyOn(server.posthogAPI, "updateTaskRun")
|
|
633
|
+
.mockResolvedValue({} as TaskRun);
|
|
634
|
+
server.session = {
|
|
635
|
+
payload: TEST_PAYLOAD,
|
|
636
|
+
acpSessionId: "acp-session",
|
|
637
|
+
clientConnection: { prompt: promptSpy },
|
|
638
|
+
logWriter: {
|
|
639
|
+
flushAll: vi.fn().mockResolvedValue(undefined),
|
|
640
|
+
getFullAgentResponse: vi.fn().mockReturnValue(null),
|
|
641
|
+
resetTurnMessages: vi.fn(),
|
|
642
|
+
flush: vi.fn().mockResolvedValue(undefined),
|
|
643
|
+
isRegistered: vi.fn().mockReturnValue(true),
|
|
644
|
+
},
|
|
645
|
+
};
|
|
646
|
+
|
|
647
|
+
await server.sendInitialTaskMessage(TEST_PAYLOAD);
|
|
648
|
+
|
|
649
|
+
expect(promptSpy).toHaveBeenCalledTimes(1);
|
|
650
|
+
expect(updateTaskRunSpy).toHaveBeenCalledWith(
|
|
651
|
+
"test-task-id",
|
|
652
|
+
"test-run-id",
|
|
653
|
+
{
|
|
654
|
+
status: "failed",
|
|
655
|
+
error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
|
|
594
656
|
},
|
|
595
657
|
);
|
|
596
658
|
});
|
|
597
659
|
|
|
598
|
-
it("surfaces upstream connection errors with the
|
|
660
|
+
it("surfaces upstream connection errors with the shared provider failure message", async () => {
|
|
599
661
|
vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
|
|
600
662
|
id: "test-task-id",
|
|
601
663
|
title: "t",
|
|
@@ -634,7 +696,7 @@ describe("Question relay", () => {
|
|
|
634
696
|
"test-run-id",
|
|
635
697
|
{
|
|
636
698
|
status: "failed",
|
|
637
|
-
error_message:
|
|
699
|
+
error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
|
|
638
700
|
},
|
|
639
701
|
);
|
|
640
702
|
});
|