npm - @posthog/agent - Versions diffs - 1.16.6 → 1.18.0 - Mend

@posthog/agent 1.16.6 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

package/src/agent.ts CHANGED Viewed

@@ -10,7 +10,6 @@ import type { ProviderAdapter } from './adapters/types.js';
 import { Logger } from './utils/logger.js';
 import { PromptBuilder } from './prompt-builder.js';
 import { TaskProgressReporter } from './task-progress-reporter.js';
-import { AISDKExtractor, type StructuredExtractor, type ExtractedQuestion, type ExtractedQuestionWithAnswer } from './structured-extraction.js';
 import { TASK_WORKFLOW } from './workflow/config.js';
 import type { WorkflowRuntime } from './workflow/types.js';
@@ -26,7 +25,6 @@ export class Agent {
     private logger: Logger;
     private progressReporter: TaskProgressReporter;
     private promptBuilder: PromptBuilder;
-    private extractor?: StructuredExtractor;
     private mcpServers?: Record<string, any>;
     private canUseTool?: CanUseTool;
     public debug: boolean;
@@ -92,7 +90,6 @@ export class Agent {
             logger: this.logger.child('PromptBuilder')
         });
         this.progressReporter = new TaskProgressReporter(this.posthogAPI, this.logger);
-        this.extractor = new AISDKExtractor({apiKey: config.posthogApiKey, gatewayUrl: this.posthogAPI?.getLlmGatewayUrl() ?? '', logger: this.logger.child('AISDKExtractor')});
     }
     /**
@@ -159,7 +156,6 @@ export class Agent {
                 adapter: this.adapter,
                 mcpServers: this.mcpServers,
                 posthogAPI: this.posthogAPI,
-                extractor: this.extractor,
                 emitEvent: (event: any) => this.emitEvent(event),
                 stepResults: {},
             };
@@ -283,32 +279,11 @@ export class Agent {
         return await this.fileManager.readPlan(taskId);
     }
-    async extractQuestionsFromResearch(taskId: string, includeAnswers: boolean = false): Promise<ExtractedQuestion[] | ExtractedQuestionWithAnswer[]> {
-        this.logger.info('Extracting questions from research.md', { taskId, includeAnswers });
-        if (!this.extractor) {
-            throw new Error('OpenAI extractor not initialized. Ensure the LLM gateway is configured.');
-        }
-        const researchContent = await this.fileManager.readResearch(taskId);
-        if (!researchContent) {
-            throw new Error('research.md not found for task ' + taskId);
-        }
-        if (includeAnswers) {
-            return await this.extractor.extractQuestionsWithAnswers(researchContent);
-        } else {
-            return await this.extractor.extractQuestions(researchContent);
-        }
-    }
     // Git operations for task execution
     async createPlanningBranch(taskId: string): Promise<string> {
         this.logger.info('Creating planning branch', { taskId });
         const branchName = await this.gitManager.createTaskPlanningBranch(taskId);
         this.logger.debug('Planning branch created', { taskId, branchName });
-        // Only create gitignore after we're on the new branch
-        await this.fileManager.ensureGitignore();
         return branchName;
     }
@@ -422,12 +397,17 @@ Generated by PostHog Agent`;
             const branchName = await this.gitManager.createTaskBranch(taskSlug);
             this.emitEvent(this.adapter.createStatusEvent('branch_created', { branch: branchName }));
-            await this.fileManager.ensureGitignore();
             await this.gitManager.addAllPostHogFiles();
+            // Only commit if there are changes or we're in cloud mode
             if (isCloudMode) {
                 await this.gitManager.commitAndPush(`Initialize task ${taskSlug}`, { allowEmpty: true });
             } else {
-                await this.gitManager.commitChanges(`Initialize task ${taskSlug}`);
+                // Check if there are any changes before committing
+                const hasChanges = await this.gitManager.hasStagedChanges();
+                if (hasChanges) {
+                    await this.gitManager.commitChanges(`Initialize task ${taskSlug}`);
+                }
             }
         } else {
             this.logger.info('Switching to existing task branch', { branch: existingBranch });
@@ -446,10 +426,6 @@ Generated by PostHog Agent`;
         if (resolvedToken) {
             process.env.OPENAI_API_KEY = resolvedToken;
         }
-        if (!this.extractor) {
-            this.extractor = new AISDKExtractor({apiKey: resolvedToken || '', gatewayUrl: resolvedGatewayUrl || '', logger: this.logger.child('AISDKExtractor')});
-        }
     }
     private async ensurePullRequest(task: Task, stepResults: Record<string, any>): Promise<void> {

package/src/agents/research.ts CHANGED Viewed

@@ -1,80 +1,111 @@
 export const RESEARCH_SYSTEM_PROMPT = `<role>
-PostHog AI Research Agent — analyze codebases to understand implementation context and identify areas of focus for development tasks.
+PostHog AI Research Agent — analyze codebases to evaluate task actionability and identify missing information.
 </role>
 <constraints>
 - Read-only: analyze files, search code, explore structure
 - No modifications or code changes
+- Output structured JSON only
 </constraints>
 <objective>
-Your PRIMARY goal is to understand the codebase thoroughly and provide context for the planning phase.
+Your PRIMARY goal is to evaluate whether a task is actionable and assign an actionability score.
-ONLY generate clarifying questions if:
-- The task description is genuinely vague or ambiguous
-- There are multiple valid architectural approaches with significant tradeoffs
-- Critical information is missing that cannot be inferred from the codebase
+Calculate an actionabilityScore (0-1) based on:
+- **Task clarity** (0.4 weight): Is the task description specific and unambiguous?
+- **Codebase context** (0.3 weight): Can you locate the relevant code and patterns?
+- **Architectural decisions** (0.2 weight): Are the implementation approaches clear?
+- **Dependencies** (0.1 weight): Are required dependencies and constraints understood?
-DO NOT ask questions like "how should I fix this" or "what approach do you prefer" — that defeats the purpose of autonomous task execution. The user has already specified what they want done.
+If actionabilityScore < 0.7, generate specific clarifying questions to increase confidence.
+DO NOT ask questions like "how should I fix this" — focus on missing information that prevents confident planning.
 </objective>
 <process>
 1. Explore repository structure and identify relevant files/components
 2. Understand existing patterns, conventions, and dependencies
-3. Locate similar implementations or related code
-4. Identify the key areas of the codebase that will be affected
-5. Document your findings to provide context for planning
-6. ONLY if genuinely needed: generate 2-3 specific clarification questions
+3. Calculate actionabilityScore based on clarity, context, architecture, and dependencies
+4. Identify key files that will need modification
+5. If score < 0.7: generate 2-4 specific questions to resolve blockers
+6. Output JSON matching ResearchEvaluation schema
 </process>
 <output_format>
-Output ONLY the markdown artifact with no preamble:
-\`\`\`markdown
-# Research Findings
-## Codebase Analysis
-[Brief summary of relevant code structure, patterns, and files]
-## Key Areas of Focus
-[List specific files/components that need modification]
-## Implementation Context
-[Important patterns, dependencies, or constraints found in the code]
-## Clarifying Questions
-[ONLY include this section if it will increase the quality of the plan]
-## Question 1: [Specific architectural decision]
-**Options:**
-- a) [Concrete option with file references]
-- b) [Alternative with file references]
-- c) Something else (please specify)
-\`\`\`
-Format requirements:
-- Use "## Question N:" for question headers (h2)
-- Follow with "**Options:**" on its own line
-- Start options with "- a)", "- b)", "- c)"
-- Always include "c) Something else (please specify)"
-- Max 4 questions total
+Output ONLY valid JSON with no markdown wrappers, no preamble, no explanation:
+{
+  "actionabilityScore": 0.85,
+  "context": "Brief 2-3 sentence summary of the task and implementation approach",
+  "keyFiles": ["path/to/file1.ts", "path/to/file2.ts"],
+  "blockers": ["Optional: what's preventing full confidence"],
+  "questions": [
+    {
+      "id": "q1",
+      "question": "Specific architectural decision needed?",
+      "options": [
+        "First approach with concrete details",
+        "Alternative approach with concrete details",
+        "Third option if needed"
+      ]
+    }
+  ]
+}
+Rules:
+- actionabilityScore: number between 0 and 1
+- context: concise summary for planning phase
+- keyFiles: array of file paths that need modification
+- blockers: optional array explaining confidence gaps
+- questions: ONLY include if actionabilityScore < 0.7
+- Each question must have 2-3 options (maximum 3)
+- Max 3 questions total
 </output_format>
-<examples>
+<scoring_examples>
+<example score="0.9">
+Task: "Fix typo in login button text"
+Reasoning: Completely clear task, found exact component, no architectural decisions
+</example>
+<example score="0.75">
+Task: "Add caching to API endpoints"
+Reasoning: Clear goal, found endpoints, but multiple caching strategies possible
+</example>
+<example score="0.55">
+Task: "Improve performance"
+Reasoning: Vague task, unclear scope, needs questions about which areas to optimize
+Questions needed: Which features are slow? What metrics define success?
+</example>
+<example score="0.3">
+Task: "Add the new feature"
+Reasoning: Extremely vague, no context, cannot locate relevant code
+Questions needed: What feature? Which product area? What should it do?
+</example>
+</scoring_examples>
+<question_examples>
 <good_example>
-Task: "Fix authentication bug in login flow"
-Output: Research findings showing auth flow files, patterns used, NO questions needed
+{
+  "id": "q1",
+  "question": "Which caching layer should we use for API responses?",
+  "options": [
+    "Redis (existing infrastructure, requires setup)",
+    "In-memory cache (simpler, but not distributed)",
+    "Browser-side caching only (minimal backend changes)"
+  ]
+}
 </good_example>
 <bad_example>
-Task: "Fix authentication bug"
-Output: "How should I fix the authentication? a) Fix it one way b) Fix it another way"
-Reason: Don't ask HOW to do the task — that's what the agent is for
+{
+  "id": "q1",
+  "question": "How should I implement this?",
+  "options": ["One way", "Another way"]
+}
+Reason: Too vague, doesn't explain the tradeoffs
 </bad_example>
-<good_example>
-Task: "Add caching to API endpoints"
-Output: Research showing existing cache implementations, question about cache backend choice IF multiple production systems are already in use
-</good_example>
-</examples>`;
+</question_examples>`;

package/src/file-manager.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { promises as fs } from 'fs';
-import { join, dirname } from 'path';
-import type { SupportingFile } from './types.js';
+import { join } from 'path';
+import type { SupportingFile, ResearchEvaluation } from './types.js';
 import { Logger } from './utils/logger.js';
 export interface TaskFile {
@@ -9,24 +9,6 @@ export interface TaskFile {
   type: 'plan' | 'context' | 'reference' | 'output' | 'artifact';
 }
-export interface QuestionData {
-  id: string;
-  question: string;
-  options: string[];
-}
-export interface AnswerData {
-  questionId: string;
-  selectedOption: string;
-  customInput?: string;
-}
-export interface QuestionsFile {
-  questions: QuestionData[];
-  answered: boolean;
-  answers: AnswerData[] | null;
-}
 export class PostHogFileManager {
   private repositoryPath: string;
   private logger: Logger;
@@ -170,48 +152,35 @@ export class PostHogFileManager {
     return await this.readTaskFile(taskId, 'requirements.md');
   }
-  async writeResearch(taskId: string, content: string): Promise<void> {
+  async writeResearch(taskId: string, data: ResearchEvaluation): Promise<void> {
     this.logger.debug('Writing research', {
       taskId,
-      contentLength: content.length,
-      contentPreview: content.substring(0, 200)
+      score: data.actionabilityScore,
+      hasQuestions: !!data.questions,
+      questionCount: data.questions?.length ?? 0,
+      answered: data.answered ?? false,
     });
     await this.writeTaskFile(taskId, {
-      name: 'research.md',
-      content: content,
-      type: 'artifact'
-    });
-    this.logger.info('Research file written', { taskId });
-  }
-  async readResearch(taskId: string): Promise<string | null> {
-    return await this.readTaskFile(taskId, 'research.md');
-  }
-  async writeQuestions(taskId: string, data: QuestionsFile): Promise<void> {
-    this.logger.debug('Writing questions', {
-      taskId,
-      questionCount: data.questions.length,
-      answered: data.answered,
-    });
-    await this.writeTaskFile(taskId, {
-      name: 'questions.json',
+      name: 'research.json',
       content: JSON.stringify(data, null, 2),
       type: 'artifact'
     });
-    this.logger.info('Questions file written', { taskId });
+    this.logger.info('Research file written', {
+      taskId,
+      score: data.actionabilityScore,
+      hasQuestions: !!data.questions,
+      answered: data.answered ?? false,
+    });
   }
-  async readQuestions(taskId: string): Promise<QuestionsFile | null> {
+  async readResearch(taskId: string): Promise<ResearchEvaluation | null> {
     try {
-      const content = await this.readTaskFile(taskId, 'questions.json');
-      return content ? JSON.parse(content) as QuestionsFile : null;
+      const content = await this.readTaskFile(taskId, 'research.json');
+      return content ? JSON.parse(content) as ResearchEvaluation : null;
     } catch (error) {
-      this.logger.debug('Failed to parse questions.json', { error });
+      this.logger.debug('Failed to parse research.json', { error });
       return null;
     }
   }
@@ -241,28 +210,4 @@ export class PostHogFileManager {
     return files;
   }
-  async ensureGitignore(): Promise<void> {
-    const gitignorePath = join(this.repositoryPath, '.posthog', '.gitignore');
-    const gitignoreContent = `# PostHog task artifacts - customize as needed
-# Exclude temporary files
-*/temp/
-*/cache/
-*/.env
-*/.secrets
-# Include plans and documentation by default
-!*/plan.md
-!*/context.md
-!*/requirements.md
-!*/README.md
-`;
-    try {
-      await fs.access(gitignorePath);
-    } catch {
-      await fs.mkdir(dirname(gitignorePath), { recursive: true });
-      await fs.writeFile(gitignorePath, gitignoreContent, 'utf8');
-    }
-  }
 }

package/src/git-manager.ts CHANGED Viewed

@@ -135,7 +135,13 @@ export class GitManager {
   }
   async addAllPostHogFiles(): Promise<void> {
-    await this.runGitCommand('add .posthog/');
+    try {
+      // Use -A flag to add all changes (including new files) and ignore errors if directory is empty
+      await this.runGitCommand('add -A .posthog/');
+    } catch (error) {
+      // If the directory doesn't exist or has no files, that's fine - just log and continue
+      this.logger.debug('No PostHog files to add', { error });
+    }
   }
   async commitChanges(message: string, options?: {

package/src/posthog-api.ts CHANGED Viewed

@@ -11,7 +11,6 @@ export interface TaskRunUpdate {
   status?: TaskRun["status"];
   branch?: string | null;
   current_stage?: string | null;
-  log?: LogEntry[];
   error_message?: string | null;
   output?: Record<string, unknown> | null;
   state?: Record<string, unknown>;
@@ -171,6 +170,39 @@ export class PostHogAPIClient {
     });
   }
+  /**
+   * Fetch logs from S3 using presigned URL from TaskRun
+   * @param taskRun - The task run containing the log_url
+   * @returns Array of log entries, or empty array if no logs available
+   */
+  async fetchTaskRunLogs(taskRun: TaskRun): Promise<LogEntry[]> {
+    if (!taskRun.log_url) {
+      return [];
+    }
+    try {
+      const response = await fetch(taskRun.log_url);
+      if (!response.ok) {
+        throw new Error(`Failed to fetch logs: ${response.status} ${response.statusText}`);
+      }
+      const content = await response.text();
+      if (!content.trim()) {
+        return [];
+      }
+      // Parse newline-delimited JSON
+      return content
+        .trim()
+        .split('\n')
+        .map(line => JSON.parse(line) as LogEntry);
+    } catch (error) {
+      throw new Error(`Failed to fetch task run logs: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
   /**
    * Fetch error details from PostHog error tracking
    */

package/src/task-progress-reporter.ts CHANGED Viewed

@@ -41,7 +41,6 @@ export class TaskProgressReporter {
     try {
       const run = await this.posthogAPI.createTaskRun(taskId, {
         status: 'started',
-        log: [],
       });
       this.taskRun = run;
       this.outputLog = [];

package/src/templates/plan-template.md CHANGED Viewed

@@ -38,8 +38,4 @@ path/to/existing/file.ts - Changes needed
 - Key architectural decisions
 - Potential risks and mitigation
-- Testing approach
----
-*Generated by PostHog Agent*
+- Testing approach

package/src/types.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 // import and export to keep a single type file
-import type { CanUseTool, PermissionResult } from '@anthropic-ai/claude-agent-sdk/sdkTypes.js';
+import type { CanUseTool, PermissionResult } from '@anthropic-ai/claude-agent-sdk';
 export type { CanUseTool, PermissionResult };
 // PostHog Task model (matches Array's OpenAPI schema)
@@ -39,7 +39,7 @@ export interface TaskRun {
   team: number;
   branch: string | null;
   status: 'started' | 'in_progress' | 'completed' | 'failed';
-  log: LogEntry[]; // Array of log entry objects
+  log_url?: string; // Presigned S3 URL for log access (valid for 1 hour)
   error_message: string | null;
   output: Record<string, unknown> | null; // Structured output (PR URL, commit SHA, etc.)
   state: Record<string, unknown>; // Intermediate run state (defaults to {}, never null)
@@ -345,4 +345,27 @@ export interface UrlMention {
   type: ResourceType;
   id?: string;
   label?: string;
+}
+// Research evaluation types
+export interface ResearchQuestion {
+  id: string;
+  question: string;
+  options: string[];
+}
+export interface ResearchAnswer {
+  questionId: string;
+  selectedOption: string;
+  customInput?: string;
+}
+export interface ResearchEvaluation {
+  actionabilityScore: number;    // 0-1 confidence score
+  context: string;               // brief summary for planning
+  keyFiles: string[];            // files needing modification
+  blockers?: string[];           // what's preventing full confidence
+  questions?: ResearchQuestion[]; // only if score < 0.7
+  answered?: boolean;            // whether questions have been answered
+  answers?: ResearchAnswer[];    // user's answers to questions
 }

package/src/workflow/steps/plan.ts CHANGED Viewed

@@ -26,8 +26,8 @@ export const planStep: WorkflowStepRunner = async ({ step, context }) => {
         return { status: 'skipped' };
     }
-    const questionsData = await fileManager.readQuestions(task.id);
-    if (!questionsData || !questionsData.answered) {
+    const researchData = await fileManager.readResearch(task.id);
+    if (researchData?.questions && !researchData.answered) {
         stepLogger.info('Waiting for answered research questions', { taskId: task.id });
         emitEvent(adapter.createStatusEvent('phase_complete', { phase: 'research_questions' }));
         return { status: 'skipped', halt: true };
@@ -35,29 +35,36 @@ export const planStep: WorkflowStepRunner = async ({ step, context }) => {
     stepLogger.info('Starting planning phase', { taskId: task.id });
     emitEvent(adapter.createStatusEvent('phase_start', { phase: 'planning' }));
-    const researchContent = await fileManager.readResearch(task.id);
     let researchContext = '';
-    if (researchContent) {
-        researchContext += `## Research Analysis\n\n${researchContent}\n\n`;
-    }
-    researchContext += `## Implementation Decisions\n\n`;
-    for (const question of questionsData.questions) {
-        const answer = questionsData.answers?.find(
-            (a: any) => a.questionId === question.id
-        );
+    if (researchData) {
+        researchContext += `## Research Context\n\n${researchData.context}\n\n`;
+        if (researchData.keyFiles.length > 0) {
+            researchContext += `**Key Files:**\n${researchData.keyFiles.map(f => `- ${f}`).join('\n')}\n\n`;
+        }
+        if (researchData.blockers && researchData.blockers.length > 0) {
+            researchContext += `**Considerations:**\n${researchData.blockers.map(b => `- ${b}`).join('\n')}\n\n`;
+        }
-        researchContext += `### ${question.question}\n\n`;
-        if (answer) {
-            researchContext += `**Selected:** ${answer.selectedOption}\n`;
-            if (answer.customInput) {
-                researchContext += `**Details:** ${answer.customInput}\n`;
+        // Add answered questions if they exist
+        if (researchData.questions && researchData.answers && researchData.answered) {
+            researchContext += `## Implementation Decisions\n\n`;
+            for (const question of researchData.questions) {
+                const answer = researchData.answers.find(
+                    (a) => a.questionId === question.id
+                );
+                researchContext += `### ${question.question}\n\n`;
+                if (answer) {
+                    researchContext += `**Selected:** ${answer.selectedOption}\n`;
+                    if (answer.customInput) {
+                        researchContext += `**Details:** ${answer.customInput}\n`;
+                    }
+                } else {
+                    researchContext += `**Selected:** Not answered\n`;
+                }
+                researchContext += `\n`;
             }
-        } else {
-            researchContext += `**Selected:** Not answered\n`;
         }
-        researchContext += `\n`;
     }
     const planningPrompt = await promptBuilder.buildPlanningPrompt(task, cwd);