@posthog/agent 1.9.0 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +8 -5
  2. package/dist/index.d.ts +3 -1
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/src/agent-registry.d.ts.map +1 -1
  5. package/dist/src/agent-registry.js +6 -0
  6. package/dist/src/agent-registry.js.map +1 -1
  7. package/dist/src/agent.d.ts +5 -0
  8. package/dist/src/agent.d.ts.map +1 -1
  9. package/dist/src/agent.js +370 -29
  10. package/dist/src/agent.js.map +1 -1
  11. package/dist/src/agents/research.d.ts +2 -0
  12. package/dist/src/agents/research.d.ts.map +1 -0
  13. package/dist/src/agents/research.js +105 -0
  14. package/dist/src/agents/research.js.map +1 -0
  15. package/dist/src/file-manager.d.ts +19 -0
  16. package/dist/src/file-manager.d.ts.map +1 -1
  17. package/dist/src/file-manager.js +39 -0
  18. package/dist/src/file-manager.js.map +1 -1
  19. package/dist/src/git-manager.d.ts +4 -0
  20. package/dist/src/git-manager.d.ts.map +1 -1
  21. package/dist/src/git-manager.js +41 -0
  22. package/dist/src/git-manager.js.map +1 -1
  23. package/dist/src/posthog-api.d.ts +16 -57
  24. package/dist/src/posthog-api.d.ts.map +1 -1
  25. package/dist/src/posthog-api.js +38 -38
  26. package/dist/src/posthog-api.js.map +1 -1
  27. package/dist/src/prompt-builder.d.ts +1 -0
  28. package/dist/src/prompt-builder.d.ts.map +1 -1
  29. package/dist/src/prompt-builder.js +40 -0
  30. package/dist/src/prompt-builder.js.map +1 -1
  31. package/dist/src/stage-executor.d.ts +1 -0
  32. package/dist/src/stage-executor.d.ts.map +1 -1
  33. package/dist/src/stage-executor.js +43 -0
  34. package/dist/src/stage-executor.js.map +1 -1
  35. package/dist/src/structured-extraction.d.ts +22 -0
  36. package/dist/src/structured-extraction.d.ts.map +1 -0
  37. package/dist/src/structured-extraction.js +136 -0
  38. package/dist/src/structured-extraction.js.map +1 -0
  39. package/dist/src/task-progress-reporter.d.ts +2 -5
  40. package/dist/src/task-progress-reporter.d.ts.map +1 -1
  41. package/dist/src/task-progress-reporter.js +37 -39
  42. package/dist/src/task-progress-reporter.js.map +1 -1
  43. package/dist/src/types.d.ts +31 -3
  44. package/dist/src/types.d.ts.map +1 -1
  45. package/dist/src/types.js.map +1 -1
  46. package/dist/src/workflow-types.d.ts +1 -1
  47. package/dist/src/workflow-types.d.ts.map +1 -1
  48. package/package.json +4 -3
  49. package/src/agent-registry.ts +6 -0
  50. package/src/agent.ts +409 -26
  51. package/src/agents/research.ts +103 -0
  52. package/src/file-manager.ts +64 -0
  53. package/src/git-manager.ts +52 -0
  54. package/src/posthog-api.ts +57 -92
  55. package/src/prompt-builder.ts +53 -0
  56. package/src/stage-executor.ts +50 -0
  57. package/src/structured-extraction.ts +167 -0
  58. package/src/task-progress-reporter.ts +38 -44
  59. package/src/types.ts +39 -3
  60. package/src/workflow-types.ts +1 -1
@@ -9,6 +9,24 @@ export interface TaskFile {
9
9
  type: 'plan' | 'context' | 'reference' | 'output' | 'artifact';
10
10
  }
11
11
 
12
+ export interface QuestionData {
13
+ id: string;
14
+ question: string;
15
+ options: string[];
16
+ }
17
+
18
+ export interface AnswerData {
19
+ questionId: string;
20
+ selectedOption: string;
21
+ customInput?: string;
22
+ }
23
+
24
+ export interface QuestionsFile {
25
+ questions: QuestionData[];
26
+ answered: boolean;
27
+ answers: AnswerData[] | null;
28
+ }
29
+
12
30
  export class PostHogFileManager {
13
31
  private repositoryPath: string;
14
32
  private logger: Logger;
@@ -152,6 +170,52 @@ export class PostHogFileManager {
152
170
  return await this.readTaskFile(taskId, 'requirements.md');
153
171
  }
154
172
 
173
+ async writeResearch(taskId: string, content: string): Promise<void> {
174
+ this.logger.debug('Writing research', {
175
+ taskId,
176
+ contentLength: content.length,
177
+ contentPreview: content.substring(0, 200)
178
+ });
179
+
180
+ await this.writeTaskFile(taskId, {
181
+ name: 'research.md',
182
+ content: content,
183
+ type: 'artifact'
184
+ });
185
+
186
+ this.logger.info('Research file written', { taskId });
187
+ }
188
+
189
+ async readResearch(taskId: string): Promise<string | null> {
190
+ return await this.readTaskFile(taskId, 'research.md');
191
+ }
192
+
193
+ async writeQuestions(taskId: string, data: QuestionsFile): Promise<void> {
194
+ this.logger.debug('Writing questions', {
195
+ taskId,
196
+ questionCount: data.questions.length,
197
+ answered: data.answered,
198
+ });
199
+
200
+ await this.writeTaskFile(taskId, {
201
+ name: 'questions.json',
202
+ content: JSON.stringify(data, null, 2),
203
+ type: 'artifact'
204
+ });
205
+
206
+ this.logger.info('Questions file written', { taskId });
207
+ }
208
+
209
+ async readQuestions(taskId: string): Promise<QuestionsFile | null> {
210
+ try {
211
+ const content = await this.readTaskFile(taskId, 'questions.json');
212
+ return content ? JSON.parse(content) as QuestionsFile : null;
213
+ } catch (error) {
214
+ this.logger.debug('Failed to parse questions.json', { error });
215
+ return null;
216
+ }
217
+ }
218
+
155
219
  async getTaskFiles(taskId: string): Promise<SupportingFile[]> {
156
220
  const fileNames = await this.listTaskFiles(taskId);
157
221
  const files: SupportingFile[] = [];
@@ -341,4 +341,56 @@ Generated by PostHog Agent`;
341
341
  throw new Error(`Failed to create PR: ${error}`);
342
342
  }
343
343
  }
344
+
345
+ async getTaskBranch(taskSlug: string): Promise<string | null> {
346
+ try {
347
+ // Get all branches matching the task slug pattern
348
+ const branches = await this.runGitCommand('branch --list --all');
349
+ const branchPattern = `posthog/task-${taskSlug}`;
350
+
351
+ // Look for exact match or with counter suffix
352
+ const lines = branches.split('\n').map(l => l.trim().replace(/^\*\s+/, ''));
353
+ for (const line of lines) {
354
+ const cleanBranch = line.replace('remotes/origin/', '');
355
+ if (cleanBranch.startsWith(branchPattern)) {
356
+ return cleanBranch;
357
+ }
358
+ }
359
+
360
+ return null;
361
+ } catch (error) {
362
+ this.logger.debug('Failed to get task branch', { taskSlug, error });
363
+ return null;
364
+ }
365
+ }
366
+
367
+ async commitAndPush(message: string, options?: { allowEmpty?: boolean }): Promise<void> {
368
+ const hasChanges = await this.hasStagedChanges();
369
+
370
+ if (!hasChanges && !options?.allowEmpty) {
371
+ this.logger.debug('No changes to commit, skipping');
372
+ return;
373
+ }
374
+
375
+ let command = `commit -m "${message.replace(/"/g, '\\"')}"`;
376
+
377
+ if (options?.allowEmpty) {
378
+ command += ' --allow-empty';
379
+ }
380
+
381
+ const authorName = this.authorName;
382
+ const authorEmail = this.authorEmail;
383
+
384
+ if (authorName && authorEmail) {
385
+ command += ` --author="${authorName} <${authorEmail}>"`;
386
+ }
387
+
388
+ await this.runGitCommand(command);
389
+
390
+ // Push to origin
391
+ const currentBranch = await this.getCurrentBranch();
392
+ await this.pushBranch(currentBranch);
393
+
394
+ this.logger.info('Committed and pushed changes', { branch: currentBranch, message });
395
+ }
344
396
  }
@@ -1,4 +1,4 @@
1
- import type { Task, SupportingFile, PostHogAPIConfig, PostHogResource, ResourceType, UrlMention } from './types.js';
1
+ import type { Task, TaskRun, LogEntry, SupportingFile, PostHogAPIConfig, PostHogResource, ResourceType, UrlMention } from './types.js';
2
2
  import type { WorkflowDefinition, AgentDefinition } from './workflow-types.js';
3
3
 
4
4
  interface PostHogApiResponse<T> {
@@ -8,52 +8,14 @@ interface PostHogApiResponse<T> {
8
8
  previous?: string | null;
9
9
  }
10
10
 
11
- interface TaskProgressResponse {
12
- has_progress: boolean;
13
- id?: string;
14
- status?: "started" | "in_progress" | "completed" | "failed";
15
- current_step?: string;
16
- completed_steps?: number;
17
- total_steps?: number;
18
- progress_percentage?: number;
19
- output_log?: string;
20
- error_message?: string;
21
- created_at?: string;
22
- updated_at?: string;
23
- completed_at?: string;
24
- workflow_id?: string;
25
- workflow_run_id?: string;
26
- message?: string;
27
- }
28
-
29
- export interface TaskProgressRecord {
30
- id: string;
31
- task: string;
32
- status: "started" | "in_progress" | "completed" | "failed";
33
- current_step?: string | null;
34
- completed_steps?: number | null;
35
- total_steps?: number | null;
36
- progress_percentage?: number | null;
37
- output_log?: string | null;
38
- error_message?: string | null;
39
- workflow_id?: string | null;
40
- workflow_run_id?: string | null;
41
- activity_id?: string | null;
42
- created_at: string;
43
- updated_at: string;
44
- completed_at?: string | null;
45
- }
46
-
47
- export interface TaskProgressUpdate {
48
- status?: TaskProgressRecord["status"];
49
- current_step?: string | null;
50
- completed_steps?: number | null;
51
- total_steps?: number | null;
52
- output_log?: string | null;
11
+ export interface TaskRunUpdate {
12
+ status?: TaskRun["status"];
13
+ branch?: string | null;
14
+ current_stage?: string | null;
15
+ log?: LogEntry[];
53
16
  error_message?: string | null;
54
- workflow_id?: string | null;
55
- workflow_run_id?: string | null;
56
- activity_id?: string | null;
17
+ output?: Record<string, unknown> | null;
18
+ state?: Record<string, unknown>;
57
19
  }
58
20
 
59
21
  export class PostHogAPIClient {
@@ -172,65 +134,76 @@ export class PostHogAPIClient {
172
134
  });
173
135
  }
174
136
 
175
- async updateTaskStage(taskId: string, stageId: string): Promise<Task> {
137
+ // TaskRun methods
138
+ async listTaskRuns(taskId: string): Promise<TaskRun[]> {
176
139
  const teamId = await this.getTeamId();
177
- return this.apiRequest<Task>(`/api/projects/${teamId}/tasks/${taskId}/update_stage/`, {
178
- method: 'PATCH',
179
- body: JSON.stringify({ current_stage: stageId }),
180
- });
140
+ const response = await this.apiRequest<PostHogApiResponse<TaskRun>>(
141
+ `/api/projects/${teamId}/tasks/${taskId}/runs/`
142
+ );
143
+ return response.results || [];
181
144
  }
182
145
 
183
- async setTaskBranch(taskId: string, branch: string): Promise<Task> {
146
+ async getTaskRun(taskId: string, runId: string): Promise<TaskRun> {
184
147
  const teamId = await this.getTeamId();
185
- return this.apiRequest<Task>(`/api/projects/${teamId}/tasks/${taskId}/set_branch/`, {
148
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/`);
149
+ }
150
+
151
+ async createTaskRun(
152
+ taskId: string,
153
+ payload?: Partial<Omit<TaskRun, 'id' | 'task' | 'team' | 'created_at' | 'updated_at' | 'completed_at'>>
154
+ ): Promise<TaskRun> {
155
+ const teamId = await this.getTeamId();
156
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/`, {
186
157
  method: "POST",
187
- body: JSON.stringify({ branch }),
158
+ body: JSON.stringify(payload || {}),
188
159
  });
189
160
  }
190
161
 
191
- async attachTaskPullRequest(taskId: string, prUrl: string, branch?: string): Promise<Task> {
162
+ async updateTaskRun(
163
+ taskId: string,
164
+ runId: string,
165
+ payload: TaskRunUpdate
166
+ ): Promise<TaskRun> {
192
167
  const teamId = await this.getTeamId();
193
- const payload: Record<string, string> = { pr_url: prUrl };
194
- if (branch) {
195
- payload.branch = branch;
196
- }
197
- return this.apiRequest<Task>(`/api/projects/${teamId}/tasks/${taskId}/attach_pr/`, {
198
- method: "POST",
168
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/`, {
169
+ method: "PATCH",
199
170
  body: JSON.stringify(payload),
200
171
  });
201
172
  }
202
173
 
203
- async getTaskProgress(taskId: string): Promise<TaskProgressResponse> {
174
+ async updateTaskRunStage(taskId: string, runId: string, stageId: string): Promise<TaskRun> {
204
175
  const teamId = await this.getTeamId();
205
- return this.apiRequest<TaskProgressResponse>(`/api/projects/${teamId}/tasks/${taskId}/progress/`);
176
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/update_stage/`, {
177
+ method: 'PATCH',
178
+ body: JSON.stringify({ current_stage: stageId }),
179
+ });
206
180
  }
207
181
 
208
- async createTaskProgress(
209
- taskId: string,
210
- payload: TaskProgressUpdate & { status: TaskProgressRecord["status"] }
211
- ): Promise<TaskProgressRecord> {
182
+ async progressTaskRun(taskId: string, runId: string, nextStageId?: string): Promise<TaskRun> {
212
183
  const teamId = await this.getTeamId();
213
- return this.apiRequest<TaskProgressRecord>(`/api/projects/${teamId}/task_progress/`, {
214
- method: "POST",
215
- body: JSON.stringify({
216
- ...payload,
217
- task: taskId,
218
- }),
184
+ const payload: Record<string, string> = {};
185
+ if (nextStageId) {
186
+ payload.next_stage_id = nextStageId;
187
+ }
188
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/progress_run/`, {
189
+ method: 'POST',
190
+ body: JSON.stringify(payload),
219
191
  });
220
192
  }
221
193
 
222
- async updateTaskProgress(
223
- taskId: string,
224
- progressId: string,
225
- payload: TaskProgressUpdate
226
- ): Promise<TaskProgressRecord> {
194
+ async setTaskRunOutput(taskId: string, runId: string, output: Record<string, unknown>): Promise<TaskRun> {
227
195
  const teamId = await this.getTeamId();
228
- return this.apiRequest<TaskProgressRecord>(`/api/projects/${teamId}/task_progress/${progressId}/`, {
229
- method: "PATCH",
230
- body: JSON.stringify({
231
- ...payload,
232
- task: taskId,
233
- }),
196
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/set_output/`, {
197
+ method: 'PATCH',
198
+ body: JSON.stringify({ output }),
199
+ });
200
+ }
201
+
202
+ async appendTaskRunLog(taskId: string, runId: string, entries: LogEntry[]): Promise<TaskRun> {
203
+ const teamId = await this.getTeamId();
204
+ return this.apiRequest<TaskRun>(`/api/projects/${teamId}/tasks/${taskId}/runs/${runId}/append_log/`, {
205
+ method: 'POST',
206
+ body: JSON.stringify({ entries }),
234
207
  });
235
208
  }
236
209
 
@@ -251,14 +224,6 @@ export class PostHogAPIClient {
251
224
  return this.apiRequest<AgentDefinition[]>(`/api/agents/`);
252
225
  }
253
226
 
254
- async progressTask(taskId: string, options?: { next_stage_id?: string; auto?: boolean }): Promise<Task> {
255
- const teamId = await this.getTeamId();
256
- return this.apiRequest<Task>(`/api/projects/${teamId}/tasks/${taskId}/progress_task/`, {
257
- method: 'POST',
258
- body: JSON.stringify(options || {}),
259
- });
260
- }
261
-
262
227
  /**
263
228
  * Fetch error details from PostHog error tracking
264
229
  */
@@ -206,6 +206,59 @@ export class PromptBuilder {
206
206
  return { description: processedDescription, referencedFiles };
207
207
  }
208
208
 
209
+ async buildResearchPrompt(task: Task, repositoryPath?: string): Promise<string> {
210
+ // Process file references in description
211
+ const { description: descriptionAfterFiles, referencedFiles } = await this.processFileReferences(
212
+ task.description,
213
+ repositoryPath
214
+ );
215
+
216
+ // Process URL references in description
217
+ const { description: processedDescription, referencedResources } = await this.processUrlReferences(
218
+ descriptionAfterFiles
219
+ );
220
+
221
+ let prompt = '';
222
+ prompt += `## Current Task\n\n**Task**: ${task.title}\n**Description**: ${processedDescription}`;
223
+
224
+ if ((task as any).primary_repository) {
225
+ prompt += `\n**Repository**: ${(task as any).primary_repository}`;
226
+ }
227
+
228
+ // Add referenced files from @ mentions
229
+ if (referencedFiles.length > 0) {
230
+ prompt += `\n\n## Referenced Files\n\n`;
231
+ for (const file of referencedFiles) {
232
+ prompt += `### ${file.path}\n\`\`\`\n${file.content}\n\`\`\`\n\n`;
233
+ }
234
+ }
235
+
236
+ // Add referenced resources from URL mentions
237
+ if (referencedResources.length > 0) {
238
+ prompt += `\n\n## Referenced Resources\n\n`;
239
+ for (const resource of referencedResources) {
240
+ prompt += `### ${resource.title} (${resource.type})\n**URL**: ${resource.url}\n\n${resource.content}\n\n`;
241
+ }
242
+ }
243
+
244
+ try {
245
+ const taskFiles = await this.getTaskFiles(task.id);
246
+ const contextFiles = taskFiles.filter((f: any) => f.type === 'context' || f.type === 'reference');
247
+ if (contextFiles.length > 0) {
248
+ prompt += `\n\n## Supporting Files`;
249
+ for (const file of contextFiles) {
250
+ prompt += `\n\n### ${file.name} (${file.type})\n${file.content}`;
251
+ }
252
+ }
253
+ } catch (error) {
254
+ this.logger.debug('No existing task files found for research', { taskId: task.id });
255
+ }
256
+
257
+ prompt += `\n\nPlease explore the codebase thoroughly and generate 3-5 clarifying questions that will help guide the implementation of this task. Use the \`create_plan\` tool to create a research.md artifact with your questions in the markdown format specified in your system prompt.`;
258
+
259
+ return prompt;
260
+ }
261
+
209
262
  async buildPlanningPrompt(task: Task, repositoryPath?: string): Promise<string> {
210
263
  // Process file references in description
211
264
  const { description: descriptionAfterFiles, referencedFiles } = await this.processFileReferences(
@@ -4,6 +4,7 @@ import { ClaudeAdapter } from './adapters/claude/claude-adapter.js';
4
4
  import { AgentRegistry } from './agent-registry.js';
5
5
  import type { AgentEvent, Task, McpServerConfig } from './types.js';
6
6
  import type { WorkflowStage, WorkflowStageExecutionResult, WorkflowExecutionOptions } from './workflow-types.js';
7
+ import { RESEARCH_SYSTEM_PROMPT } from './agents/research.js';
7
8
  import { PLANNING_SYSTEM_PROMPT } from './agents/planning.js';
8
9
  import { EXECUTION_SYSTEM_PROMPT } from './agents/execution.js';
9
10
  import { PromptBuilder } from './prompt-builder.js';
@@ -57,6 +58,8 @@ export class StageExecutor {
57
58
  const cwd = options.repositoryPath || process.cwd();
58
59
 
59
60
  switch (agent.agent_type) {
61
+ case 'research':
62
+ return this.runResearch(task, cwd, options, stage.key);
60
63
  case 'planning':
61
64
  return this.runPlanning(task, cwd, options, stage.key);
62
65
  case 'execution':
@@ -70,6 +73,53 @@ export class StageExecutor {
70
73
  }
71
74
  }
72
75
 
76
+ private async runResearch(task: Task, cwd: string, options: WorkflowExecutionOptions, stageKey: string): Promise<WorkflowStageExecutionResult> {
77
+ const contextPrompt = await this.promptBuilder.buildResearchPrompt(task, cwd);
78
+ let prompt = RESEARCH_SYSTEM_PROMPT + '\n\n' + contextPrompt;
79
+
80
+ const stageOverrides = options.stageOverrides?.[stageKey] || options.stageOverrides?.['research'];
81
+ const mergedOverrides = {
82
+ ...(options.queryOverrides || {}),
83
+ ...(stageOverrides?.queryOverrides || {}),
84
+ } as Record<string, any>;
85
+
86
+ const baseOptions: Record<string, any> = {
87
+ model: 'claude-sonnet-4-5-20250929',
88
+ cwd,
89
+ permissionMode: 'plan',
90
+ settingSources: ['local'],
91
+ mcpServers: this.mcpServers
92
+ };
93
+
94
+ const response = query({
95
+ prompt,
96
+ options: { ...baseOptions, ...mergedOverrides },
97
+ });
98
+
99
+ let research = '';
100
+ for await (const message of response) {
101
+ // Emit raw SDK event first
102
+ this.eventHandler?.(this.adapter.createRawSDKEvent(message));
103
+
104
+ // Then emit transformed event
105
+ const transformed = this.adapter.transform(message);
106
+ if (transformed) {
107
+ if (transformed.type !== 'token') {
108
+ this.logger.debug('Research event', { type: transformed.type });
109
+ }
110
+ this.eventHandler?.(transformed);
111
+ }
112
+
113
+ if (message.type === 'assistant' && message.message?.content) {
114
+ for (const c of message.message.content) {
115
+ if (c.type === 'text' && c.text) research += c.text + '\n';
116
+ }
117
+ }
118
+ }
119
+
120
+ return { plan: research.trim() }; // Return as 'plan' field to match existing interface
121
+ }
122
+
73
123
  private async runPlanning(task: Task, cwd: string, options: WorkflowExecutionOptions, stageKey: string): Promise<WorkflowStageExecutionResult> {
74
124
  const contextPrompt = await this.promptBuilder.buildPlanningPrompt(task, cwd);
75
125
  let prompt = PLANNING_SYSTEM_PROMPT + '\n\n' + contextPrompt;
@@ -0,0 +1,167 @@
1
+ import OpenAI from 'openai';
2
+ import { Logger } from './utils/logger.js';
3
+
4
+ export interface ExtractedQuestion {
5
+ id: string;
6
+ question: string;
7
+ options: string[];
8
+ }
9
+
10
+ export interface ExtractedQuestionWithAnswer extends ExtractedQuestion {
11
+ recommendedAnswer: string;
12
+ justification: string;
13
+ }
14
+
15
+ const questionsOnlySchema = {
16
+ type: 'object',
17
+ properties: {
18
+ questions: {
19
+ type: 'array',
20
+ items: {
21
+ type: 'object',
22
+ properties: {
23
+ id: { type: 'string' },
24
+ question: { type: 'string' },
25
+ options: {
26
+ type: 'array',
27
+ items: { type: 'string' }
28
+ }
29
+ },
30
+ required: ['id', 'question', 'options'],
31
+ additionalProperties: false
32
+ }
33
+ }
34
+ },
35
+ required: ['questions'],
36
+ additionalProperties: false
37
+ };
38
+
39
+ const questionsWithAnswersSchema = {
40
+ type: 'object',
41
+ properties: {
42
+ questions: {
43
+ type: 'array',
44
+ items: {
45
+ type: 'object',
46
+ properties: {
47
+ id: { type: 'string' },
48
+ question: { type: 'string' },
49
+ options: {
50
+ type: 'array',
51
+ items: { type: 'string' }
52
+ },
53
+ recommendedAnswer: { type: 'string' },
54
+ justification: { type: 'string' }
55
+ },
56
+ required: ['id', 'question', 'options', 'recommendedAnswer', 'justification'],
57
+ additionalProperties: false
58
+ }
59
+ }
60
+ },
61
+ required: ['questions'],
62
+ additionalProperties: false
63
+ };
64
+
65
+ export interface StructuredExtractor {
66
+ extractQuestions(researchContent: string): Promise<ExtractedQuestion[]>;
67
+ extractQuestionsWithAnswers(researchContent: string): Promise<ExtractedQuestionWithAnswer[]>;
68
+ }
69
+
70
+ export class OpenAIExtractor implements StructuredExtractor {
71
+ private client: OpenAI;
72
+ private logger: Logger;
73
+
74
+ constructor(logger?: Logger) {
75
+ const apiKey = process.env.OPENAI_API_KEY;
76
+ if (!apiKey) {
77
+ throw new Error('OPENAI_API_KEY environment variable is required for structured extraction');
78
+ }
79
+
80
+ this.client = new OpenAI({ apiKey });
81
+ this.logger = logger || new Logger({ debug: false, prefix: '[OpenAIExtractor]' });
82
+ }
83
+
84
+ async extractQuestions(researchContent: string): Promise<ExtractedQuestion[]> {
85
+ this.logger.debug('Extracting questions from research content', {
86
+ contentLength: researchContent.length,
87
+ });
88
+
89
+ const completion = await this.client.chat.completions.create({
90
+ model: 'gpt-4o-mini',
91
+ messages: [
92
+ {
93
+ role: 'system',
94
+ content: 'Extract the research questions from the provided markdown. Return a JSON object matching the schema.',
95
+ },
96
+ {
97
+ role: 'user',
98
+ content: researchContent,
99
+ },
100
+ ],
101
+ response_format: {
102
+ type: 'json_schema',
103
+ json_schema: {
104
+ name: 'questions',
105
+ strict: true,
106
+ schema: questionsOnlySchema,
107
+ },
108
+ },
109
+ });
110
+
111
+ const content = completion.choices[0].message.content;
112
+ if (!content) {
113
+ throw new Error('No content in OpenAI response');
114
+ }
115
+
116
+ const parsed = JSON.parse(content) as { questions: ExtractedQuestion[] };
117
+
118
+ this.logger.info('Successfully extracted questions', {
119
+ questionCount: parsed.questions.length,
120
+ });
121
+
122
+ return parsed.questions;
123
+ }
124
+
125
+ async extractQuestionsWithAnswers(
126
+ researchContent: string,
127
+ ): Promise<ExtractedQuestionWithAnswer[]> {
128
+ this.logger.debug('Extracting questions with recommended answers', {
129
+ contentLength: researchContent.length,
130
+ });
131
+
132
+ const completion = await this.client.chat.completions.create({
133
+ model: 'gpt-4o-mini',
134
+ messages: [
135
+ {
136
+ role: 'system',
137
+ content: 'Extract the research questions from the markdown and provide recommended answers based on the analysis. For each question, include a recommendedAnswer (the letter: a, b, c, etc.) and a brief justification. Return a JSON object matching the schema.',
138
+ },
139
+ {
140
+ role: 'user',
141
+ content: researchContent,
142
+ },
143
+ ],
144
+ response_format: {
145
+ type: 'json_schema',
146
+ json_schema: {
147
+ name: 'questions_with_answers',
148
+ strict: true,
149
+ schema: questionsWithAnswersSchema,
150
+ },
151
+ },
152
+ });
153
+
154
+ const content = completion.choices[0].message.content;
155
+ if (!content) {
156
+ throw new Error('No content in OpenAI response');
157
+ }
158
+
159
+ const parsed = JSON.parse(content) as { questions: ExtractedQuestionWithAnswer[] };
160
+
161
+ this.logger.info('Successfully extracted questions with answers', {
162
+ questionCount: parsed.questions.length,
163
+ });
164
+
165
+ return parsed.questions;
166
+ }
167
+ }