@planningo/duul 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,9 @@
1
+ import { randomUUID } from 'node:crypto';
1
2
  import OpenAI from 'openai';
2
3
  import { zodTextFormat } from 'openai/helpers/zod';
3
4
  import { validateProjectRoot } from '../filesystem.js';
4
- import { executeFilesystemTool } from '../filesystem-tools.js';
5
+ import { CHATGPT_BASE_URL } from './codex-auth.js';
6
+ import { executeFilesystemTool, createReviewerByteBudget } from '../filesystem-tools.js';
5
7
  import { estimateCost } from '../pricing.js';
6
8
  const MAX_INPUT_CHARS = 400_000;
7
9
  const MAX_TOOL_ROUNDS = 10;
@@ -166,31 +168,57 @@ function validateInputLength(systemPrompt, userMessage) {
166
168
  }
167
169
  export class OpenAIProvider {
168
170
  name = 'openai';
169
- capabilities = {
170
- structuredOutputs: true,
171
- toolCalling: true,
172
- previousResponseId: true,
173
- jsonSchemaStrict: true,
174
- };
171
+ capabilities;
175
172
  client;
176
173
  model;
177
174
  temperature;
178
175
  topP;
176
+ /**
177
+ * ChatGPT-backend mode. The endpoint is stateless (`store: false`): it does
178
+ * not support `previous_response_id`, `temperature`/`top_p`, or
179
+ * `max_output_tokens`, and it streams. We resend the full input each turn.
180
+ */
181
+ stateless;
182
+ baseURL;
183
+ defaultHeaders;
184
+ refresh;
185
+ reasoningEffort;
179
186
  constructor(config) {
180
- const apiKey = config?.apiKey ?? process.env.OPENAI_API_KEY;
187
+ const chatgpt = config?.chatgpt;
188
+ this.stateless = !!chatgpt;
189
+ this.refresh = chatgpt?.refresh;
190
+ this.reasoningEffort = process.env.DUUL_REASONING_EFFORT ?? 'medium';
191
+ const apiKey = chatgpt?.accessToken ?? config?.apiKey ?? process.env.OPENAI_API_KEY;
181
192
  if (!apiKey) {
182
- throw new Error('OPENAI_API_KEY environment variable is not set');
193
+ throw new Error('No OpenAI credential found. Set OPENAI_API_KEY, or sign in with the Codex CLI (`codex login`).');
183
194
  }
184
- this.client = new OpenAI({
185
- apiKey,
186
- ...(config?.baseUrl ? { baseURL: config.baseUrl } : {}),
187
- });
195
+ this.baseURL = chatgpt ? CHATGPT_BASE_URL : config?.baseUrl;
196
+ this.defaultHeaders = chatgpt
197
+ ? { 'chatgpt-account-id': chatgpt.accountId, originator: 'codex_cli_rs', 'session-id': randomUUID() }
198
+ : undefined;
199
+ this.client = this.buildClient(apiKey);
188
200
  this.model = config?.model ?? process.env.REVIEW_MODEL ?? 'gpt-5.4';
189
201
  this.temperature = config?.temperature ?? 0.2;
190
202
  this.topP = config?.topP ?? 0.1;
203
+ this.capabilities = {
204
+ structuredOutputs: true,
205
+ toolCalling: true,
206
+ // Native server-side chaining is available only in api-key mode. The
207
+ // ChatGPT backend is stateless, so continuity there comes from turn replay.
208
+ previousResponseId: !this.stateless,
209
+ conversationReplay: this.stateless,
210
+ jsonSchemaStrict: true,
211
+ };
212
+ }
213
+ buildClient(apiKey) {
214
+ return new OpenAI({
215
+ apiKey,
216
+ ...(this.baseURL ? { baseURL: this.baseURL } : {}),
217
+ ...(this.defaultHeaders ? { defaultHeaders: this.defaultHeaders } : {}),
218
+ });
191
219
  }
192
220
  async review(options) {
193
- const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId } = options;
221
+ const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId, conversationHistory } = options;
194
222
  validateInputLength(systemPrompt, userMessage);
195
223
  const effectiveRoot = workspaceScope?.root ?? null;
196
224
  if (effectiveRoot && !workspaceScope) {
@@ -225,19 +253,49 @@ export class OpenAIProvider {
225
253
  const baseParams = {
226
254
  model: this.model,
227
255
  instructions: systemPrompt,
228
- temperature: this.temperature,
229
- top_p: this.topP,
230
- max_output_tokens: 16384,
231
256
  text: { format: zodTextFormat(outputSchema, schemaName) },
232
257
  ...(tools ? { tools } : {}),
258
+ ...(this.stateless
259
+ ? {
260
+ // ChatGPT backend: stateless, reasoning-only sampling, encrypted
261
+ // reasoning must be echoed back on each turn (store: false).
262
+ store: false,
263
+ reasoning: { effort: this.reasoningEffort },
264
+ include: ['reasoning.encrypted_content'],
265
+ }
266
+ : {
267
+ temperature: this.temperature,
268
+ top_p: this.topP,
269
+ max_output_tokens: 16384,
270
+ }),
233
271
  };
234
- let response = await this.apiCallWithRetry({
235
- ...baseParams,
236
- input: [{ role: 'user', content: [{ type: 'input_text', text: userMessage }] }],
237
- ...(previousReviewId ? { previous_response_id: previousReviewId } : {}),
238
- });
272
+ // Stateless (ChatGPT backend): accumulate the full input across tool rounds
273
+ // since there is no server-side `previous_response_id` chaining. Prior rounds
274
+ // are replayed as message items (user: input_text, assistant: output_text).
275
+ const inputItems = [];
276
+ if (this.stateless && conversationHistory?.length) {
277
+ inputItems.push(...conversationHistory);
278
+ }
279
+ inputItems.push({ role: 'user', content: [{ type: 'input_text', text: userMessage }] });
280
+ let response = this.stateless
281
+ ? await this.apiCallWithRetry({ ...baseParams, input: inputItems })
282
+ : await this.apiCallWithRetry({
283
+ ...baseParams,
284
+ input: inputItems,
285
+ ...(previousReviewId ? { previous_response_id: previousReviewId } : {}),
286
+ });
239
287
  accumulateUsage(response);
240
288
  console.error(`[duul] response.id=${response.id} model=${this.model} provider=openai`);
289
+ // Continue the conversation after a tool round. Stateless mode resends the
290
+ // whole input (prior assistant output items + the new tool outputs); chained
291
+ // mode uses server-side previous_response_id and sends only the new items.
292
+ const continueConversation = async (newItems) => {
293
+ if (this.stateless) {
294
+ inputItems.push(...response.output, ...newItems);
295
+ return this.apiCallWithRetry({ ...baseParams, input: inputItems });
296
+ }
297
+ return this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: newItems });
298
+ };
241
299
  // Agentic tool-calling loop
242
300
  if (effectiveRoot) {
243
301
  const toolReadBudget = MAX_INPUT_CHARS - (systemPrompt.length + userMessage.length);
@@ -274,6 +332,7 @@ export class OpenAIProvider {
274
332
  };
275
333
  const toolCache = new Map();
276
334
  const callCounts = new Map();
335
+ const byteBudget = createReviewerByteBudget();
277
336
  for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
278
337
  const functionCalls = this.getFunctionCalls(response);
279
338
  if (functionCalls.length === 0)
@@ -302,14 +361,14 @@ export class OpenAIProvider {
302
361
  toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: budgetMessage(call.name, currentLevel) });
303
362
  continue;
304
363
  }
305
- const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope);
364
+ const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope, byteBudget);
306
365
  toolCache.set(cacheKey, result);
307
366
  allUsedTools.push(`${call.name}(${argSummary})`);
308
367
  accumulatedToolChars += result.length;
309
368
  console.error(`[duul] ${call.name}(${argSummary}) -> ${result.length} chars (total: ${accumulatedToolChars}/${toolReadBudget}, level ${getStrategyLevel()})`);
310
369
  toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: result });
311
370
  }
312
- response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: toolResults });
371
+ response = await continueConversation(toolResults);
313
372
  accumulateUsage(response);
314
373
  console.error(`[duul] response.id=${response.id} (after tool round ${round + 1})`);
315
374
  if (getStrategyLevel() >= 3 && this.hasPendingFunctionCalls(response)) {
@@ -317,7 +376,7 @@ export class OpenAIProvider {
317
376
  type: 'function_call_output', call_id: c.call_id,
318
377
  output: 'No more file reads allowed. You must produce your final review verdict now.',
319
378
  }));
320
- response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: stopResults });
379
+ response = await continueConversation(stopResults);
321
380
  accumulateUsage(response);
322
381
  break;
323
382
  }
@@ -329,7 +388,7 @@ export class OpenAIProvider {
329
388
  type: 'function_call_output', call_id: c.call_id,
330
389
  output: 'Tool call limit reached. You must produce your final review verdict now.',
331
390
  }));
332
- response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: stopResults });
391
+ response = await continueConversation(stopResults);
333
392
  accumulateUsage(response);
334
393
  }
335
394
  }
@@ -337,31 +396,68 @@ export class OpenAIProvider {
337
396
  const costStr = usage.estimated_cost_usd !== null ? ` (~$${usage.estimated_cost_usd.toFixed(4)})` : '';
338
397
  const cachedStr = usage.cached_input_tokens ? ` [cached: ${usage.cached_input_tokens}]` : '';
339
398
  console.error(`[duul] Token usage: ${usage.input_tokens} in + ${usage.output_tokens} out = ${usage.total_tokens} total (${usage.api_calls} API calls)${cachedStr}${costStr}`);
399
+ // Stateless mode: record this round's user/assistant turns so the reviewer
400
+ // can replay them next round (the ChatGPT backend has no native chaining).
401
+ // Only the final Q&A is kept — replaying every tool call would bloat tokens
402
+ // and risks stale encrypted-reasoning items across separate responses.
403
+ const buildTurns = (assistantText) => this.stateless
404
+ ? [
405
+ ...(conversationHistory ?? []),
406
+ { role: 'user', content: [{ type: 'input_text', text: userMessage }] },
407
+ { role: 'assistant', content: [{ type: 'output_text', text: assistantText }] },
408
+ ]
409
+ : undefined;
340
410
  // Extract structured output
411
+ const outputText = this.getOutputText(response);
341
412
  const parsed = this.extractStructuredOutput(response, outputSchema);
342
413
  if (parsed !== null) {
343
- return { parsed, reviewId: response.id, usage };
414
+ return { parsed, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? '') };
344
415
  }
345
416
  if (options.createFallback) {
346
417
  const reason = this.hasPendingFunctionCalls(response) ? 'round_limit' : 'budget';
347
418
  const fallback = options.createFallback(reason, allUsedTools);
348
419
  console.error(`[duul] Returning structured fallback (reason: ${reason}).`);
349
- return { parsed: fallback, reviewId: response.id, usage };
420
+ return { parsed: fallback, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? JSON.stringify(fallback)) };
350
421
  }
351
422
  throw new Error('Review failed: could not obtain structured verdict after tool loop.');
352
423
  }
353
424
  async apiCallWithRetry(params) {
425
+ let refreshedOnce = false;
354
426
  for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
355
427
  const controller = new AbortController();
356
428
  const timeout = setTimeout(() => controller.abort(), 120_000);
357
429
  try {
358
- const response = await this.client.responses.create({ ...params, stream: false }, { signal: controller.signal });
430
+ let response;
431
+ if (this.stateless) {
432
+ // ChatGPT backend requires streaming and leaves `response.completed`'s
433
+ // `output` empty — aggregate items from the streamed events instead.
434
+ const stream = this.client.responses.stream(params, { signal: controller.signal });
435
+ response = await this.aggregateStream(stream);
436
+ }
437
+ else {
438
+ response = (await this.client.responses.create({ ...params, stream: false }, { signal: controller.signal }));
439
+ }
359
440
  clearTimeout(timeout);
360
441
  return response;
361
442
  }
362
443
  catch (error) {
363
444
  clearTimeout(timeout);
364
- const isRetryable = error instanceof Error && ('status' in error ? (error.status === 429 || error.status >= 500) : error.name === 'AbortError');
445
+ const status = error instanceof Error && 'status' in error ? error.status : undefined;
446
+ // ChatGPT token expired mid-review: refresh once and retry immediately.
447
+ if (status === 401 && this.refresh && !refreshedOnce) {
448
+ refreshedOnce = true;
449
+ try {
450
+ const token = await this.refresh();
451
+ this.client = this.buildClient(token);
452
+ console.error('[duul] Refreshed Codex token after 401, retrying');
453
+ attempt--; // don't consume a retry for the refresh
454
+ continue;
455
+ }
456
+ catch (refreshError) {
457
+ console.error(`[duul] Codex token refresh failed: ${refreshError instanceof Error ? refreshError.message : refreshError}`);
458
+ }
459
+ }
460
+ const isRetryable = error instanceof Error && (status !== undefined ? (status === 429 || status >= 500) : error.name === 'AbortError');
365
461
  if (isRetryable && attempt < MAX_RETRIES - 1) {
366
462
  const delay = 1000 * Math.pow(2, attempt);
367
463
  console.error(`[duul] Retry ${attempt + 1}/${MAX_RETRIES} after ${delay}ms`);
@@ -373,6 +469,53 @@ export class OpenAIProvider {
373
469
  }
374
470
  throw new Error('Unreachable: exhausted retries');
375
471
  }
472
+ /**
473
+ * Aggregate a streamed Responses call into a Response object.
474
+ *
475
+ * The ChatGPT backend delivers completed output items via
476
+ * `response.output_item.done` events and returns an EMPTY `output` array on
477
+ * `response.completed`, so we collect items from the stream ourselves. Usage
478
+ * and id come from `response.completed` (falling back to `response.created`).
479
+ */
480
+ async aggregateStream(stream) {
481
+ const output = [];
482
+ let id = '';
483
+ let usage;
484
+ for await (const event of stream) {
485
+ switch (event.type) {
486
+ case 'response.created':
487
+ id = event.response.id;
488
+ break;
489
+ case 'response.output_item.done':
490
+ output.push(event.item);
491
+ break;
492
+ case 'response.completed':
493
+ id = event.response.id ?? id;
494
+ usage = event.response.usage;
495
+ break;
496
+ case 'response.failed':
497
+ throw new Error(`ChatGPT backend response failed: ${event.response.error?.message ?? 'unknown error'}`);
498
+ case 'error':
499
+ throw new Error(`ChatGPT backend stream error: ${event.message ?? 'unknown error'}`);
500
+ default:
501
+ break;
502
+ }
503
+ }
504
+ return { id, output, usage };
505
+ }
506
+ /** Return the first output_text string in the response, or null. */
507
+ getOutputText(response) {
508
+ for (const item of response.output) {
509
+ if (item.type === 'message' && 'content' in item) {
510
+ const msg = item;
511
+ for (const content of msg.content) {
512
+ if (content.type === 'output_text' && content.text)
513
+ return content.text;
514
+ }
515
+ }
516
+ }
517
+ return null;
518
+ }
376
519
  extractStructuredOutput(response, outputSchema) {
377
520
  for (const item of response.output) {
378
521
  if (item.type === 'message' && 'content' in item) {
@@ -56,8 +56,14 @@ export interface ProviderCapabilities {
56
56
  structuredOutputs: boolean;
57
57
  /** Supports tool/function calling */
58
58
  toolCalling: boolean;
59
- /** Supports previous_response_id for conversation continuity */
59
+ /** Supports NATIVE server-side conversation chaining via previous_response_id */
60
60
  previousResponseId: boolean;
61
+ /**
62
+ * Continuity is achieved by replaying prior turns (conversationHistory) rather
63
+ * than native server-side chaining. When true, the reviewer stores/loads
64
+ * conversation turns per reviewId and passes them back on the next round.
65
+ */
66
+ conversationReplay: boolean;
61
67
  /** Supports strict JSON schema mode */
62
68
  jsonSchemaStrict: boolean;
63
69
  }
@@ -34,3 +34,11 @@ export declare function computeIterationMeta(phase: ReviewPhase, callerIteration
34
34
  * (last allowed iteration).
35
35
  */
36
36
  export declare function isIterationLimitExceeded(phase: ReviewPhase, callerIterationCount?: number, requestMaxOverride?: number): boolean;
37
+ /**
38
+ * Emit a soft cost warning once iteration_count crosses ~60% of the limit.
39
+ * Uses the current round's estimated cost as a rough per-round figure so the
40
+ * orchestrator can decide whether to accept a near-verdict or escalate.
41
+ *
42
+ * Returns null when below the threshold, or when iteration_count is 0.
43
+ */
44
+ export declare function computeCostWarning(iterMeta: IterationMeta, estimatedCostUsd: number | null): string | null;
@@ -63,3 +63,24 @@ export function isIterationLimitExceeded(phase, callerIterationCount, requestMax
63
63
  const limit = getIterationLimit(phase, requestMaxOverride);
64
64
  return callerIterationCount > limit;
65
65
  }
66
+ const COST_WARNING_RATIO = 0.6;
67
+ /**
68
+ * Emit a soft cost warning once iteration_count crosses ~60% of the limit.
69
+ * Uses the current round's estimated cost as a rough per-round figure so the
70
+ * orchestrator can decide whether to accept a near-verdict or escalate.
71
+ *
72
+ * Returns null when below the threshold, or when iteration_count is 0.
73
+ */
74
+ export function computeCostWarning(iterMeta, estimatedCostUsd) {
75
+ if (iterMeta.iteration_count <= 0)
76
+ return null;
77
+ const trigger = Math.ceil(iterMeta.iteration_limit * COST_WARNING_RATIO);
78
+ if (iterMeta.iteration_count < trigger)
79
+ return null;
80
+ const costStr = estimatedCostUsd !== null && estimatedCostUsd > 0
81
+ ? `~$${estimatedCostUsd.toFixed(4)}`
82
+ : 'an unknown amount';
83
+ return (`This is iteration ${iterMeta.iteration_count} of ${iterMeta.iteration_limit}. ` +
84
+ `Each round costs ${costStr}. ` +
85
+ `Consider accepting REVISE-with-minor-issues or escalating to human.`);
86
+ }
@@ -4,8 +4,14 @@
4
4
  */
5
5
  import type { z } from 'zod';
6
6
  import type { WorkspaceScope } from './filesystem.js';
7
- import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage } from './providers/types.js';
7
+ import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage, ConversationTurn } from './providers/types.js';
8
8
  export type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage };
9
+ export type ReviewToolName = 'plan' | 'code' | 'partition';
10
+ type ReviewerModel = string | {
11
+ plan?: string;
12
+ code?: string;
13
+ partition?: string;
14
+ };
9
15
  export interface ReviewOptions<T extends z.ZodType> {
10
16
  systemPrompt: string;
11
17
  userMessage: string;
@@ -13,9 +19,10 @@ export interface ReviewOptions<T extends z.ZodType> {
13
19
  outputSchema: T;
14
20
  workspaceScope?: WorkspaceScope | null;
15
21
  previousReviewId?: string;
22
+ toolName?: ReviewToolName;
16
23
  reviewerConfig?: {
17
24
  provider?: string;
18
- model?: string;
25
+ model?: ReviewerModel;
19
26
  base_url?: string;
20
27
  api_key?: string;
21
28
  temperature?: number;
@@ -23,6 +30,31 @@ export interface ReviewOptions<T extends z.ZodType> {
23
30
  };
24
31
  createFallback?: (reason: ExhaustionReason, usedTools: string[]) => z.infer<T>;
25
32
  }
33
+ /**
34
+ * Resolve a concrete model string from either the flat string form or
35
+ * the per-tool object form. Returns undefined when nothing is set so the
36
+ * provider falls back to env/default.
37
+ */
38
+ export declare function resolveModelForTool(model: ReviewerModel | undefined, toolName: ReviewToolName | undefined): string | undefined;
39
+ /**
40
+ * Decide how to handle cross-round continuity for a provider, given whether the
41
+ * caller supplied a previousReviewId. Pure function so it can be unit-tested.
42
+ *
43
+ * - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
44
+ * - `shouldWarn`: the caller asked for continuity but the provider supports
45
+ * neither native chaining nor replay, so context will be lost.
46
+ */
47
+ export declare function continuityPlan(capabilities: {
48
+ previousResponseId: boolean;
49
+ conversationReplay: boolean;
50
+ }, hasPreviousReviewId: boolean): {
51
+ shouldLoad: boolean;
52
+ shouldWarn: boolean;
53
+ };
54
+ /** Reset the in-memory conversation store. Test-only. */
55
+ export declare function __resetConversationStoreForTest(): void;
56
+ export declare function getConversationHistory(reviewId: string, workspaceRoot?: string): Promise<ConversationTurn[] | undefined>;
57
+ export declare function storeConversation(reviewId: string, turns: ConversationTurn[], workspaceRoot?: string): Promise<void>;
26
58
  /**
27
59
  * Main entry point for all review calls.
28
60
  * Resolves provider from config, delegates the call.
@@ -3,6 +3,21 @@ import { join, dirname } from 'node:path';
3
3
  import { OpenAIProvider } from './providers/openai.js';
4
4
  import { AnthropicProvider } from './providers/anthropic.js';
5
5
  import { GoogleProvider } from './providers/google.js';
6
+ import { resolveCodexCredential } from './providers/codex-auth.js';
7
+ /**
8
+ * Resolve a concrete model string from either the flat string form or
9
+ * the per-tool object form. Returns undefined when nothing is set so the
10
+ * provider falls back to env/default.
11
+ */
12
+ export function resolveModelForTool(model, toolName) {
13
+ if (model === undefined)
14
+ return undefined;
15
+ if (typeof model === 'string')
16
+ return model;
17
+ if (!toolName)
18
+ return undefined;
19
+ return model[toolName];
20
+ }
6
21
  /**
7
22
  * Resolve the effective provider name from config and env vars.
8
23
  * Priority: per-request config > env REVIEW_PROVIDER > "openai"
@@ -52,26 +67,50 @@ function apiKeyFingerprint(key) {
52
67
  return key;
53
68
  return `${key.slice(0, 4)}...${key.slice(-4)}`;
54
69
  }
55
- function getProviderCacheKey(provider, config) {
70
+ function getProviderCacheKey(provider, resolvedModel, config) {
56
71
  const apiKey = config?.api_key ?? resolveApiKey(provider);
57
72
  return JSON.stringify({
58
73
  provider,
59
- model: config?.model,
74
+ model: resolvedModel,
60
75
  base_url: config?.base_url,
61
76
  temperature: config?.temperature,
62
77
  top_p: config?.top_p,
63
78
  key_fp: apiKeyFingerprint(apiKey),
64
79
  });
65
80
  }
81
+ /**
82
+ * Resolve the OpenAI credential, falling back to the Codex CLI login when no
83
+ * explicit or env API key is present. Returns either an API key or a ChatGPT
84
+ * bearer credential (Sign in with ChatGPT).
85
+ */
86
+ async function resolveOpenAiCredential(configApiKey) {
87
+ const explicitKey = configApiKey ?? process.env.OPENAI_API_KEY;
88
+ if (explicitKey)
89
+ return { apiKey: explicitKey };
90
+ const cred = await resolveCodexCredential();
91
+ if (!cred)
92
+ return {}; // let the provider throw its standard "no credential" error
93
+ if (cred.mode === 'apikey') {
94
+ console.error('[duul] Using OpenAI API key from Codex CLI login (~/.codex/auth.json)');
95
+ return { apiKey: cred.apiKey };
96
+ }
97
+ console.error('[duul] Using Sign in with ChatGPT credentials from Codex CLI login');
98
+ return { chatgpt: { accessToken: cred.accessToken, accountId: cred.accountId, refresh: cred.refresh } };
99
+ }
66
100
  /**
67
101
  * Create or retrieve a cached provider instance.
102
+ *
103
+ * `toolName` lets callers use the per-tool model override form:
104
+ * `{ plan: "...", code: "...", partition: "..." }`. The resolved model
105
+ * participates in the cache key so per-tool models don't collide.
68
106
  */
69
- function getProvider(reviewerConfig) {
107
+ async function getProvider(reviewerConfig, toolName) {
70
108
  const providerName = resolveProviderName(reviewerConfig?.provider);
71
109
  const hasEphemeralKey = !!reviewerConfig?.api_key;
110
+ const resolvedModel = resolveModelForTool(reviewerConfig?.model, toolName);
72
111
  // Per-request api_key → skip cache (ephemeral credential, don't leak into shared cache)
73
112
  if (!hasEphemeralKey) {
74
- const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
113
+ const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
75
114
  if (providerCache.has(cacheKey)) {
76
115
  return providerCache.get(cacheKey);
77
116
  }
@@ -80,15 +119,21 @@ function getProvider(reviewerConfig) {
80
119
  const constructorConfig = {
81
120
  apiKey,
82
121
  baseUrl: reviewerConfig?.base_url,
83
- model: reviewerConfig?.model,
122
+ model: resolvedModel,
84
123
  temperature: reviewerConfig?.temperature,
85
124
  topP: reviewerConfig?.top_p,
86
125
  };
87
126
  let provider;
127
+ // ChatGPT-login providers hold a rotating bearer token — never cache them.
128
+ let bypassCache = hasEphemeralKey;
88
129
  switch (providerName) {
89
- case 'openai':
90
- provider = new OpenAIProvider(constructorConfig);
130
+ case 'openai': {
131
+ const cred = await resolveOpenAiCredential(reviewerConfig?.api_key);
132
+ if (cred.chatgpt)
133
+ bypassCache = true;
134
+ provider = new OpenAIProvider({ ...constructorConfig, apiKey: cred.apiKey ?? apiKey, chatgpt: cred.chatgpt });
91
135
  break;
136
+ }
92
137
  case 'anthropic':
93
138
  provider = new AnthropicProvider(constructorConfig);
94
139
  break;
@@ -110,18 +155,18 @@ function getProvider(reviewerConfig) {
110
155
  default:
111
156
  throw new Error(`Unknown provider: ${providerName}`);
112
157
  }
113
- // Only cache env-based providers (not ephemeral per-request keys)
114
- if (!hasEphemeralKey) {
158
+ // Only cache stable env-based providers (not ephemeral keys or rotating tokens)
159
+ if (!bypassCache) {
115
160
  // Evict oldest entry if cache is full
116
161
  if (providerCache.size >= MAX_CACHE_SIZE) {
117
162
  const oldestKey = providerCache.keys().next().value;
118
163
  providerCache.delete(oldestKey);
119
164
  console.error(`[duul] Provider cache full, evicted oldest entry`);
120
165
  }
121
- const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
166
+ const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
122
167
  providerCache.set(cacheKey, provider);
123
168
  }
124
- console.error(`[duul] Created ${providerName} provider (model: ${reviewerConfig?.model ?? 'default'}${hasEphemeralKey ? ', ephemeral key' : ''})`);
169
+ console.error(`[duul] Created ${providerName} provider (model: ${resolvedModel ?? 'default'}${toolName ? `, tool: ${toolName}` : ''}${bypassCache ? ', uncached' : ''})`);
125
170
  return provider;
126
171
  }
127
172
  // --- Conversation history store (disk-persisted per workspace) ---
@@ -142,6 +187,11 @@ function conversationsPath(workspaceRoot) {
142
187
  async function loadFromDisk(workspaceRoot) {
143
188
  if (diskLoaded && lastWorkspaceRoot === workspaceRoot)
144
189
  return;
190
+ // Switching workspaces: drop the previous workspace's entries so they aren't
191
+ // flushed into (or replayed from) the new workspace's conversations file.
192
+ if (lastWorkspaceRoot !== null && lastWorkspaceRoot !== workspaceRoot) {
193
+ memoryCache.clear();
194
+ }
145
195
  lastWorkspaceRoot = workspaceRoot;
146
196
  diskLoaded = true;
147
197
  try {
@@ -189,7 +239,29 @@ function evictOldest() {
189
239
  console.error(`[duul] Conversation store full, evicted oldest entry`);
190
240
  }
191
241
  }
192
- async function getConversationHistory(reviewId, workspaceRoot) {
242
+ /**
243
+ * Decide how to handle cross-round continuity for a provider, given whether the
244
+ * caller supplied a previousReviewId. Pure function so it can be unit-tested.
245
+ *
246
+ * - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
247
+ * - `shouldWarn`: the caller asked for continuity but the provider supports
248
+ * neither native chaining nor replay, so context will be lost.
249
+ */
250
+ export function continuityPlan(capabilities, hasPreviousReviewId) {
251
+ if (!hasPreviousReviewId)
252
+ return { shouldLoad: false, shouldWarn: false };
253
+ return {
254
+ shouldLoad: capabilities.conversationReplay,
255
+ shouldWarn: !capabilities.previousResponseId && !capabilities.conversationReplay,
256
+ };
257
+ }
258
+ /** Reset the in-memory conversation store. Test-only. */
259
+ export function __resetConversationStoreForTest() {
260
+ memoryCache.clear();
261
+ diskLoaded = false;
262
+ lastWorkspaceRoot = null;
263
+ }
264
+ export async function getConversationHistory(reviewId, workspaceRoot) {
193
265
  if (workspaceRoot)
194
266
  await loadFromDisk(workspaceRoot);
195
267
  const entry = memoryCache.get(reviewId);
@@ -198,7 +270,7 @@ async function getConversationHistory(reviewId, workspaceRoot) {
198
270
  entry.lastAccessed = Date.now();
199
271
  return entry.turns;
200
272
  }
201
- async function storeConversation(reviewId, turns, workspaceRoot) {
273
+ export async function storeConversation(reviewId, turns, workspaceRoot) {
202
274
  evictOldest();
203
275
  memoryCache.set(reviewId, { turns, lastAccessed: Date.now() });
204
276
  if (workspaceRoot) {
@@ -210,21 +282,23 @@ async function storeConversation(reviewId, turns, workspaceRoot) {
210
282
  * Resolves provider from config, delegates the call.
211
283
  */
212
284
  export async function callReview(options) {
213
- const provider = getProvider(options.reviewerConfig);
285
+ const provider = await getProvider(options.reviewerConfig, options.toolName);
214
286
  // Log capability warnings for non-full-featured providers
215
287
  if (!provider.capabilities.toolCalling && options.workspaceScope?.root) {
216
288
  console.error(`[duul] Warning: ${provider.name} provider does not support tool calling. ` +
217
289
  'Reviewer will not be able to explore the workspace. Consider providing more context via relevant_code/artifact_refs.');
218
290
  }
219
- if (!provider.capabilities.previousResponseId && options.previousReviewId) {
220
- console.error(`[duul] Warning: ${provider.name} provider does not support previous_response_id. ` +
291
+ const plan = continuityPlan(provider.capabilities, !!options.previousReviewId);
292
+ if (plan.shouldWarn) {
293
+ console.error(`[duul] Warning: ${provider.name} provider does not support conversation continuity. ` +
221
294
  'Reviewer context from previous rounds will not be available.');
222
295
  }
223
296
  const workspaceRoot = options.workspaceScope?.root;
224
- // Retrieve conversation history for providers that use simulated context
225
- // OpenAI uses native previous_response_id, so skip for it
297
+ // Retrieve conversation history for replay-based providers (Anthropic, and the
298
+ // OpenAI ChatGPT-login backend). Native-chaining providers (OpenAI api-key)
299
+ // pass previousReviewId straight through and don't need replay.
226
300
  let conversationHistory;
227
- if (options.previousReviewId && provider.capabilities.previousResponseId && provider.name !== 'openai') {
301
+ if (plan.shouldLoad) {
228
302
  conversationHistory = await getConversationHistory(options.previousReviewId, workspaceRoot);
229
303
  if (conversationHistory) {
230
304
  console.error(`[duul] Loaded conversation history for ${options.previousReviewId} (${conversationHistory.length} turns)`);
@@ -234,8 +308,8 @@ export async function callReview(options) {
234
308
  }
235
309
  }
236
310
  const result = await provider.review({ ...options, conversationHistory });
237
- // Store conversation turns for future rounds (non-OpenAI providers)
238
- if (result.conversationTurns?.length && provider.name !== 'openai') {
311
+ // Store conversation turns for future rounds (replay-based providers only)
312
+ if (result.conversationTurns?.length && provider.capabilities.conversationReplay) {
239
313
  await storeConversation(result.reviewId, result.conversationTurns, workspaceRoot);
240
314
  console.error(`[duul] Stored conversation (${result.conversationTurns.length} turns) for ${result.reviewId}`);
241
315
  }