npm - @posthog/agent - Versions diffs - 2.3.556 → 2.3.619 - Mend

@posthog/agent 2.3.556 → 2.3.619

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/agent.js +64 -30
package/dist/agent.js.map +1 -1
package/dist/handoff-checkpoint.js +142 -117
package/dist/handoff-checkpoint.js.map +1 -1
package/dist/posthog-api.js +1 -1
package/dist/posthog-api.js.map +1 -1
package/dist/server/agent-server.d.ts +2 -1
package/dist/server/agent-server.js +125 -69
package/dist/server/agent-server.js.map +1 -1
package/dist/server/bin.cjs +136 -81
package/dist/server/bin.cjs.map +1 -1
package/package.json +3 -3
package/src/adapters/claude/conversion/sdk-to-acp.ts +1 -26
package/src/adapters/codex/codex-agent.test.ts +83 -0
package/src/adapters/codex/codex-agent.ts +16 -0
package/src/adapters/error-classification.ts +30 -0
package/src/server/agent-server.test.ts +17 -0
package/src/server/agent-server.ts +28 -9
package/src/server/question-relay.test.ts +67 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@posthog/agent",
-  "version": "2.3.556",
+  "version": "2.3.619",
   "repository": "https://github.com/PostHog/code",
   "description": "TypeScript agent framework wrapping Claude Agent SDK with Git-based task execution for PostHog",
   "exports": {
@@ -107,8 +107,8 @@
     "typescript": "^5.5.0",
     "vitest": "^2.1.8",
     "@posthog/shared": "1.0.0",
-    "@posthog/git": "1.0.0",
-    "@posthog/enricher": "1.0.0"
+    "@posthog/enricher": "1.0.0",
+    "@posthog/git": "1.0.0"
   },
   "dependencies": {
     "@agentclientprotocol/sdk": "0.19.0",

package/src/adapters/claude/conversion/sdk-to-acp.ts CHANGED Viewed

@@ -22,6 +22,7 @@ import { image, text } from "../../../utils/acp-content";
 import { unreachable } from "../../../utils/common";
 import type { Logger } from "../../../utils/logger";
 import { tryParsePartialJson } from "../../../utils/partial-json";
+import { classifyAgentError } from "../../error-classification";
 import { type EnrichedReadCache, registerHookCallback } from "../hooks";
 import type {
   Session,
@@ -696,32 +697,6 @@ export type ResultMessageHandlerResult = {
   };
 };
-export type AgentErrorClassification =
-  | "upstream_stream_terminated"
-  | "upstream_connection_error"
-  | "agent_error";
-/**
- * Classify an error string surfaced by the Claude CLI via `is_error: true`
- * result messages. Transient upstream-stream terminations (e.g. the fetch body
- * from the LLM gateway is torn down mid-stream) are retriable; most other
- * errors are not.
- */
-export function classifyAgentError(
-  result: string | undefined,
-): AgentErrorClassification {
-  if (!result) return "agent_error";
-  const text = result.trim();
-  // Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
-  if (/API Error:\s*terminated\b/i.test(text)) {
-    return "upstream_stream_terminated";
-  }
-  if (/API Error:\s*Connection error\b/i.test(text)) {
-    return "upstream_connection_error";
-  }
-  return "agent_error";
-}
 export function handleResultMessage(
   message: SDKResultMessage,
 ): ResultMessageHandlerResult {

package/src/adapters/codex/codex-agent.test.ts CHANGED Viewed

@@ -306,6 +306,89 @@ describe("CodexAcpAgent", () => {
     ).resolves.toEqual({ stopReason: "end_turn" });
   });
+  it.each([
+    ["API Error: 429 rate_limit_error", "upstream_provider_failure"],
+    ["API Error: 503 internal_error", "upstream_provider_failure"],
+    ["API Error: 529 overloaded_error", "upstream_provider_failure"],
+    ["ordinary failure", undefined],
+  ] as const)(
+    "handles prompt failure %p",
+    async (message, expectedClassification) => {
+      const { agent } = createAgent();
+      mockCodexConnection.newSession.mockResolvedValue({
+        sessionId: "session-1",
+        modes: { currentModeId: "auto", availableModes: [] },
+        configOptions: [],
+      } satisfies Partial<NewSessionResponse>);
+      await agent.newSession({
+        cwd: process.cwd(),
+      } as never);
+      const promptError = new Error(message);
+      mockCodexConnection.prompt.mockRejectedValueOnce(promptError);
+      let thrown: unknown;
+      try {
+        await agent.prompt({
+          sessionId: "session-1",
+          prompt: [{ type: "text", text: "A" }],
+        } as never);
+      } catch (error) {
+        thrown = error;
+      }
+      if (!expectedClassification) {
+        expect(thrown).toBe(promptError);
+        return;
+      }
+      expect(thrown).toMatchObject({
+        data: {
+          classification: expectedClassification,
+          result: message,
+        },
+      });
+    },
+  );
+  it("does not let a classified failing prompt block subsequent prompts", async () => {
+    const { agent } = createAgent();
+    mockCodexConnection.newSession.mockResolvedValue({
+      sessionId: "session-1",
+      modes: { currentModeId: "auto", availableModes: [] },
+      configOptions: [],
+    } satisfies Partial<NewSessionResponse>);
+    await agent.newSession({
+      cwd: process.cwd(),
+    } as never);
+    mockCodexConnection.prompt.mockRejectedValueOnce(
+      new Error("API Error: 529 overloaded_error"),
+    );
+    mockCodexConnection.prompt.mockResolvedValueOnce({
+      stopReason: "end_turn",
+    });
+    await expect(
+      agent.prompt({
+        sessionId: "session-1",
+        prompt: [{ type: "text", text: "A" }],
+      } as never),
+    ).rejects.toMatchObject({
+      data: {
+        classification: "upstream_provider_failure",
+        result: "API Error: 529 overloaded_error",
+      },
+    });
+    await expect(
+      agent.prompt({
+        sessionId: "session-1",
+        prompt: [{ type: "text", text: "B" }],
+      } as never),
+    ).resolves.toEqual({ stopReason: "end_turn" });
+  });
   describe("structured output injection", () => {
     const schema = {
       type: "object",

package/src/adapters/codex/codex-agent.ts CHANGED Viewed

@@ -62,6 +62,7 @@ import {
   nodeWritableToWebWritable,
 } from "../../utils/streams";
 import { BaseAcpAgent, type BaseSession } from "../base-acp-agent";
+import { classifyAgentError } from "../error-classification";
 import { createCodexClient } from "./codex-client";
 import { normalizeCodexConfigOptions } from "./models";
 import {
@@ -138,6 +139,19 @@ function prependPrContext(params: PromptRequest): PromptRequest {
   };
 }
+function classifyPromptError(error: unknown): unknown {
+  const message = error instanceof Error ? error.message : String(error ?? "");
+  const classification = classifyAgentError(message);
+  if (classification === "agent_error") {
+    return error;
+  }
+  return RequestError.internalError(
+    { classification, result: message },
+    message,
+  );
+}
 const CODEX_NATIVE_MODE: Record<CodeExecutionMode, CodexNativeMode> = {
   auto: "auto",
   default: "auto",
@@ -577,6 +591,8 @@ export class CodexAcpAgent extends BaseAcpAgent {
     let response: PromptResponse;
     try {
       response = await this.codexConnection.prompt(prependPrContext(params));
+    } catch (error) {
+      throw classifyPromptError(error);
     } finally {
       this.session.promptRunning = false;
     }

package/src/adapters/error-classification.ts ADDED Viewed

@@ -0,0 +1,30 @@
+export type AgentErrorClassification =
+  | "upstream_stream_terminated"
+  | "upstream_connection_error"
+  | "upstream_provider_failure"
+  | "agent_error";
+const UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN = /API Error:\s*(?:429|5\d\d)\b/i;
+/**
+ * Classify error strings surfaced by agent adapters. Transient upstream
+ * failures are retriable when they match exact stream/connection patterns or
+ * retryable provider HTTP statuses; most other errors are not.
+ */
+export function classifyAgentError(
+  result: string | undefined,
+): AgentErrorClassification {
+  if (!result) return "agent_error";
+  const text = result.trim();
+  // Anthropic SDK surfaces an undici fetch abort as "API Error: terminated".
+  if (/API Error:\s*terminated\b/i.test(text)) {
+    return "upstream_stream_terminated";
+  }
+  if (/API Error:\s*Connection error\b/i.test(text)) {
+    return "upstream_connection_error";
+  }
+  if (UPSTREAM_PROVIDER_ERROR_STATUS_PATTERN.test(text)) {
+    return "upstream_provider_failure";
+  }
+  return "agent_error";
+}

package/src/server/agent-server.test.ts CHANGED Viewed

@@ -826,6 +826,9 @@ describe("AgentServer HTTP Mode", () => {
           "If the user explicitly asks you to open or update a pull request",
           "open a draft pull request",
           "unless the user explicitly asks",
+          ".github/pull_request_template.md",
+          "gh issue list --search",
+          "Closes #<n>",
           "Generated-By: PostHog Code",
           "Task-Id: test-task-id",
         ],
@@ -868,6 +871,13 @@ describe("AgentServer HTTP Mode", () => {
       expect(prompt).toContain("Generated-By: PostHog Code");
       expect(prompt).toContain("Task-Id: test-task-id");
       expect(prompt).toContain("Created with [PostHog Code]");
+      // PR template detection (repo first, org `.github` fallback)
+      expect(prompt).toContain(".github/pull_request_template.md");
+      expect(prompt).toContain("org's `.github` repo");
+      // Related-issue linking
+      expect(prompt).toContain("gh issue list --state open --search");
+      expect(prompt).toContain("Closes #<n>");
+      expect(prompt).toContain("Refs #<n>");
       delete process.env.POSTHOG_CODE_INTERACTION_ORIGIN;
     });
@@ -895,6 +905,13 @@ describe("AgentServer HTTP Mode", () => {
       );
       expect(prompt).toContain("Push to the existing PR branch");
       expect(prompt).not.toContain("Create a draft pull request");
+      // Review-comment thread handling: reply + resolve
+      expect(prompt).toContain("review thread");
+      expect(prompt).toContain("/pulls/{n}/comments/{id}/replies");
+      expect(prompt).toContain("resolveReviewThread");
+      expect(prompt).toContain(
+        "Do NOT push fixes for review comments without replying to and resolving each related thread.",
+      );
       delete process.env.POSTHOG_CODE_INTERACTION_ORIGIN;
     });

package/src/server/agent-server.ts CHANGED Viewed

@@ -24,7 +24,7 @@ import {
 import {
   type AgentErrorClassification,
   classifyAgentError,
-} from "../adapters/claude/conversion/sdk-to-acp";
+} from "../adapters/error-classification";
 import type { PermissionMode } from "../execution-mode";
 import { DEFAULT_CODEX_MODEL } from "../gateway-models";
 import { HandoffCheckpointTracker } from "../handoff-checkpoint";
@@ -65,9 +65,20 @@ import type { AgentServerConfig } from "./types";
 const agentErrorClassificationSchema = z.enum([
   "upstream_stream_terminated",
   "upstream_connection_error",
+  "upstream_provider_failure",
   "agent_error",
 ]) satisfies z.ZodType<AgentErrorClassification>;
+export const UPSTREAM_PROVIDER_FAILURE_MESSAGE =
+  "The upstream AI provider failed to process the request. Please retry the task in a few minutes.";
+const upstreamProviderFailureClassifications =
+  new Set<AgentErrorClassification>([
+    "upstream_stream_terminated",
+    "upstream_connection_error",
+    "upstream_provider_failure",
+  ]);
 const errorWithClassificationSchema = z.object({
   data: z.object({ classification: agentErrorClassificationSchema }),
 });
@@ -1051,12 +1062,11 @@ export class AgentServer {
     error: unknown,
   ): Promise<void> {
     const { classification, message } = this.extractErrorClassification(error);
-    const errorMessage =
-      classification === "upstream_stream_terminated"
-        ? "Upstream LLM stream terminated"
-        : classification === "upstream_connection_error"
-          ? "Upstream LLM connection error"
-          : message || "Agent error";
+    const errorMessage = upstreamProviderFailureClassifications.has(
+      classification,
+    )
+      ? UPSTREAM_PROVIDER_FAILURE_MESSAGE
+      : message || "Agent error";
     this.logger.error(`send_${phase}_task_message_failed`, {
       classification,
       message,
@@ -1633,9 +1643,14 @@ After completing the requested changes:
 1. Check out the existing PR branch with \`gh pr checkout ${prUrl}\`
 2. Stage and commit all changes with a clear commit message
 3. Push to the existing PR branch
+4. For every PR review comment or review thread you addressed, treat the thread as done only after BOTH of these:
+   - Reply on the thread with a short note describing what changed (reference the commit SHA when useful) using \`gh api -X POST /repos/{owner}/{repo}/pulls/{n}/comments/{id}/replies -f body='...'\`.
+   - Resolve the thread via the \`resolveReviewThread\` GraphQL mutation: \`gh api graphql -f query='mutation($id:ID!){resolveReviewThread(input:{threadId:$id}){thread{isResolved}}}' -f id="<thread-node-id>"\`.
+   List unresolved threads first with \`gh api graphql -f query='{repository(owner:"<owner>",name:"<repo>"){pullRequest(number:<n>){reviewThreads(first:100){nodes{id isResolved comments(first:1){nodes{body}}}}}}}'\` so you can resolve each one you fixed.
 Important:
 - Do NOT create a new branch or a new pull request.
+- Do NOT push fixes for review comments without replying to and resolving each related thread.
 ${attributionInstructions}
 `;
     }
@@ -1651,7 +1666,7 @@ When the user asks for code changes:
 When the user explicitly asks to clone or work in a GitHub repository:
 - Clone the repository into /tmp/workspace/repos/<owner>/<repo> using \`gh repo clone <owner>/<repo> /tmp/workspace/repos/<owner>/<repo>\`
 - Work from inside that cloned repository for follow-up code changes
-- If the user explicitly asks you to open or update a pull request, create a branch, commit the requested changes, push it, and open a draft pull request from inside the clone
+- If the user explicitly asks you to open or update a pull request, create a branch, commit the requested changes, push it, and open a draft pull request from inside the clone. Before opening the PR, check the cloned repo for a PR template at \`.github/pull_request_template.md\` (or variants; fall back to the org's \`.github\` repo via \`gh api\`) and use it as the body structure, and search for matching open issues with \`gh issue list --search\` to include \`Closes #<n>\` / \`Refs #<n>\` links.
 - Do NOT create branches, commits, push changes, or open pull requests unless the user explicitly asks for that`;
       return `
@@ -1694,7 +1709,11 @@ After completing the requested changes:
 1. Create a new branch prefixed with \`posthog-code/\` (e.g. \`posthog-code/fix-login-redirect\`) based on the work done
 2. Stage and commit all changes with a clear commit message
 3. Push the branch to origin
-4. Create a draft pull request using \`gh pr create --draft${this.config.baseBranch ? ` --base ${this.config.baseBranch}` : ""}\` with a descriptive title and body. Add the following footer at the end of the PR description:
+4. Before opening the PR, prepare the body:
+   - Check the repo for a PR template at \`.github/pull_request_template.md\` (also try \`.github/PULL_REQUEST_TEMPLATE.md\`, \`docs/pull_request_template.md\`, and root variants). If one exists, use its exact section headings as the PR body — do NOT fall back to a generic Summary/Test plan format.
+   - If no repo-level template exists, check the org's \`.github\` repo via \`gh api /repos/<owner>/.github/contents/.github/pull_request_template.md\` (and other common paths) and use that as a fallback.
+   - Search for matching open issues with \`gh issue list --state open --search '<keywords>'\` (derive keywords from the branch name, commits, and changed files; \`gh issue view <n>\` to confirm relevance). For every issue this PR would resolve, include a \`Closes #<n>\` line in the body so GitHub auto-links and auto-closes it on merge. For issues that are related but not fully resolved, use \`Refs #<n>\` instead.
+5. Create a draft pull request using \`gh pr create --draft${this.config.baseBranch ? ` --base ${this.config.baseBranch}` : ""}\` with a descriptive title and the body prepared above. Add the following footer at the end of the PR description:
 \`\`\`
 ---
 *Created with [PostHog Code](https://posthog.com/code?ref=pr)*

package/src/server/question-relay.test.ts CHANGED Viewed

@@ -1,11 +1,11 @@
 import { type SetupServerApi, setupServer } from "msw/node";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
-import { classifyAgentError } from "../adapters/claude/conversion/sdk-to-acp";
+import { classifyAgentError } from "../adapters/error-classification";
 import type { PostHogAPIClient } from "../posthog-api";
 import { createTestRepo, type TestRepo } from "../test/fixtures/api";
 import { createPostHogHandlers } from "../test/mocks/msw-handlers";
 import type { Task, TaskRun } from "../types";
-import { AgentServer } from "./agent-server";
+import { AgentServer, UPSTREAM_PROVIDER_FAILURE_MESSAGE } from "./agent-server";
 interface TestableAgentServer {
   posthogAPI: PostHogAPIClient;
@@ -76,10 +76,28 @@ function createTransientConnectionError(): Error & {
   return error;
 }
+function createUpstreamProviderFailureError(): Error & {
+  data: { classification: string; result: string };
+} {
+  const result =
+    'API Error: 529 {"error":{"message":"{\\"type\\":\\"error\\",\\"error\\":{\\"type\\":\\"overloaded_error\\",\\"message\\":\\"Overloaded\\"}}","type":"api_error"}}';
+  const error = new Error(result) as Error & {
+    data: { classification: string; result: string };
+  };
+  error.data = {
+    classification: "upstream_provider_failure",
+    result,
+  };
+  return error;
+}
 describe("Question relay", () => {
   it.each([
     ["API Error: terminated", "upstream_stream_terminated"],
     ["API Error: Connection error", "upstream_connection_error"],
+    ["API Error: 429 rate_limit_error", "upstream_provider_failure"],
+    ["API Error: 529 overloaded_error", "upstream_provider_failure"],
+    ["API Error: 503 internal_error", "upstream_provider_failure"],
     ["something else", "agent_error"],
     [undefined, "agent_error"],
   ])("classifies %p as %s", (message, expected) => {
@@ -590,12 +608,56 @@ describe("Question relay", () => {
         "test-run-id",
         {
           status: "failed",
-          error_message: "Upstream LLM stream terminated",
+          error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
+        },
+      );
+    });
+    it("surfaces upstream provider failures with a retryable message", async () => {
+      vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
+        id: "test-task-id",
+        title: "t",
+        description: "original task description",
+      } as unknown as Task);
+      vi.spyOn(server.posthogAPI, "getTaskRun").mockResolvedValue({
+        id: "test-run-id",
+        task: "test-task-id",
+        state: {},
+      } as unknown as TaskRun);
+      const promptSpy = vi
+        .fn()
+        .mockRejectedValueOnce(createUpstreamProviderFailureError());
+      const updateTaskRunSpy = vi
+        .spyOn(server.posthogAPI, "updateTaskRun")
+        .mockResolvedValue({} as TaskRun);
+      server.session = {
+        payload: TEST_PAYLOAD,
+        acpSessionId: "acp-session",
+        clientConnection: { prompt: promptSpy },
+        logWriter: {
+          flushAll: vi.fn().mockResolvedValue(undefined),
+          getFullAgentResponse: vi.fn().mockReturnValue(null),
+          resetTurnMessages: vi.fn(),
+          flush: vi.fn().mockResolvedValue(undefined),
+          isRegistered: vi.fn().mockReturnValue(true),
+        },
+      };
+      await server.sendInitialTaskMessage(TEST_PAYLOAD);
+      expect(promptSpy).toHaveBeenCalledTimes(1);
+      expect(updateTaskRunSpy).toHaveBeenCalledWith(
+        "test-task-id",
+        "test-run-id",
+        {
+          status: "failed",
+          error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
         },
       );
     });
-    it("surfaces upstream connection errors with the connection-specific message", async () => {
+    it("surfaces upstream connection errors with the shared provider failure message", async () => {
       vi.spyOn(server.posthogAPI, "getTask").mockResolvedValue({
         id: "test-task-id",
         title: "t",
@@ -634,7 +696,7 @@ describe("Question relay", () => {
         "test-run-id",
         {
           status: "failed",
-          error_message: "Upstream LLM connection error",
+          error_message: UPSTREAM_PROVIDER_FAILURE_MESSAGE,
         },
       );
     });