npm - @planningo/duul - Versions diffs - 1.0.0 → 1.1.0 - Mend

@planningo/duul 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.ko.md +92 -6
package/README.md +94 -7
package/build/prompts/code-review-system.js +11 -1
package/build/prompts/plan-review-system.js +11 -1
package/build/schemas/code-review.d.ts +48 -11
package/build/schemas/code-review.js +22 -3
package/build/schemas/common.d.ts +26 -3
package/build/schemas/common.js +16 -2
package/build/schemas/execution-partition.d.ts +97 -63
package/build/schemas/execution-partition.js +13 -3
package/build/schemas/plan-review.d.ts +42 -8
package/build/schemas/plan-review.js +15 -1
package/build/services/filesystem-tools.d.ts +19 -1
package/build/services/filesystem-tools.js +50 -13
package/build/services/filesystem.d.ts +20 -0
package/build/services/filesystem.js +51 -17
package/build/services/providers/anthropic.js +5 -3
package/build/services/providers/codex-auth.d.ts +51 -0
package/build/services/providers/codex-auth.js +178 -0
package/build/services/providers/google.js +4 -2
package/build/services/providers/openai.d.ts +33 -0
package/build/services/providers/openai.js +173 -30
package/build/services/providers/types.d.ts +7 -1
package/build/services/review-limits.d.ts +8 -0
package/build/services/review-limits.js +21 -0
package/build/services/reviewer.d.ts +34 -2
package/build/services/reviewer.js +95 -21
package/build/tools/code-review.js +50 -7
package/build/tools/execution-partition.js +55 -10
package/build/tools/plan-review.js +38 -6
package/package.json +1 -1

package/build/services/providers/openai.js CHANGED Viewed

@@ -1,7 +1,9 @@
+import { randomUUID } from 'node:crypto';
 import OpenAI from 'openai';
 import { zodTextFormat } from 'openai/helpers/zod';
 import { validateProjectRoot } from '../filesystem.js';
-import { executeFilesystemTool } from '../filesystem-tools.js';
+import { CHATGPT_BASE_URL } from './codex-auth.js';
+import { executeFilesystemTool, createReviewerByteBudget } from '../filesystem-tools.js';
 import { estimateCost } from '../pricing.js';
 const MAX_INPUT_CHARS = 400_000;
 const MAX_TOOL_ROUNDS = 10;
@@ -166,31 +168,57 @@ function validateInputLength(systemPrompt, userMessage) {
 }
 export class OpenAIProvider {
     name = 'openai';
-    capabilities = {
-        structuredOutputs: true,
-        toolCalling: true,
-        previousResponseId: true,
-        jsonSchemaStrict: true,
-    };
+    capabilities;
     client;
     model;
     temperature;
     topP;
+    /**
+     * ChatGPT-backend mode. The endpoint is stateless (`store: false`): it does
+     * not support `previous_response_id`, `temperature`/`top_p`, or
+     * `max_output_tokens`, and it streams. We resend the full input each turn.
+     */
+    stateless;
+    baseURL;
+    defaultHeaders;
+    refresh;
+    reasoningEffort;
     constructor(config) {
-        const apiKey = config?.apiKey ?? process.env.OPENAI_API_KEY;
+        const chatgpt = config?.chatgpt;
+        this.stateless = !!chatgpt;
+        this.refresh = chatgpt?.refresh;
+        this.reasoningEffort = process.env.DUUL_REASONING_EFFORT ?? 'medium';
+        const apiKey = chatgpt?.accessToken ?? config?.apiKey ?? process.env.OPENAI_API_KEY;
         if (!apiKey) {
-            throw new Error('OPENAI_API_KEY environment variable is not set');
+            throw new Error('No OpenAI credential found. Set OPENAI_API_KEY, or sign in with the Codex CLI (`codex login`).');
         }
-        this.client = new OpenAI({
-            apiKey,
-            ...(config?.baseUrl ? { baseURL: config.baseUrl } : {}),
-        });
+        this.baseURL = chatgpt ? CHATGPT_BASE_URL : config?.baseUrl;
+        this.defaultHeaders = chatgpt
+            ? { 'chatgpt-account-id': chatgpt.accountId, originator: 'codex_cli_rs', 'session-id': randomUUID() }
+            : undefined;
+        this.client = this.buildClient(apiKey);
         this.model = config?.model ?? process.env.REVIEW_MODEL ?? 'gpt-5.4';
         this.temperature = config?.temperature ?? 0.2;
         this.topP = config?.topP ?? 0.1;
+        this.capabilities = {
+            structuredOutputs: true,
+            toolCalling: true,
+            // Native server-side chaining is available only in api-key mode. The
+            // ChatGPT backend is stateless, so continuity there comes from turn replay.
+            previousResponseId: !this.stateless,
+            conversationReplay: this.stateless,
+            jsonSchemaStrict: true,
+        };
+    }
+    buildClient(apiKey) {
+        return new OpenAI({
+            apiKey,
+            ...(this.baseURL ? { baseURL: this.baseURL } : {}),
+            ...(this.defaultHeaders ? { defaultHeaders: this.defaultHeaders } : {}),
+        });
     }
     async review(options) {
-        const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId } = options;
+        const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId, conversationHistory } = options;
         validateInputLength(systemPrompt, userMessage);
         const effectiveRoot = workspaceScope?.root ?? null;
         if (effectiveRoot && !workspaceScope) {
@@ -225,19 +253,49 @@ export class OpenAIProvider {
         const baseParams = {
             model: this.model,
             instructions: systemPrompt,
-            temperature: this.temperature,
-            top_p: this.topP,
-            max_output_tokens: 16384,
             text: { format: zodTextFormat(outputSchema, schemaName) },
             ...(tools ? { tools } : {}),
+            ...(this.stateless
+                ? {
+                    // ChatGPT backend: stateless, reasoning-only sampling, encrypted
+                    // reasoning must be echoed back on each turn (store: false).
+                    store: false,
+                    reasoning: { effort: this.reasoningEffort },
+                    include: ['reasoning.encrypted_content'],
+                }
+                : {
+                    temperature: this.temperature,
+                    top_p: this.topP,
+                    max_output_tokens: 16384,
+                }),
         };
-        let response = await this.apiCallWithRetry({
-            ...baseParams,
-            input: [{ role: 'user', content: [{ type: 'input_text', text: userMessage }] }],
-            ...(previousReviewId ? { previous_response_id: previousReviewId } : {}),
-        });
+        // Stateless (ChatGPT backend): accumulate the full input across tool rounds
+        // since there is no server-side `previous_response_id` chaining. Prior rounds
+        // are replayed as message items (user: input_text, assistant: output_text).
+        const inputItems = [];
+        if (this.stateless && conversationHistory?.length) {
+            inputItems.push(...conversationHistory);
+        }
+        inputItems.push({ role: 'user', content: [{ type: 'input_text', text: userMessage }] });
+        let response = this.stateless
+            ? await this.apiCallWithRetry({ ...baseParams, input: inputItems })
+            : await this.apiCallWithRetry({
+                ...baseParams,
+                input: inputItems,
+                ...(previousReviewId ? { previous_response_id: previousReviewId } : {}),
+            });
         accumulateUsage(response);
         console.error(`[duul] response.id=${response.id} model=${this.model} provider=openai`);
+        // Continue the conversation after a tool round. Stateless mode resends the
+        // whole input (prior assistant output items + the new tool outputs); chained
+        // mode uses server-side previous_response_id and sends only the new items.
+        const continueConversation = async (newItems) => {
+            if (this.stateless) {
+                inputItems.push(...response.output, ...newItems);
+                return this.apiCallWithRetry({ ...baseParams, input: inputItems });
+            }
+            return this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: newItems });
+        };
         // Agentic tool-calling loop
         if (effectiveRoot) {
             const toolReadBudget = MAX_INPUT_CHARS - (systemPrompt.length + userMessage.length);
@@ -274,6 +332,7 @@ export class OpenAIProvider {
             };
             const toolCache = new Map();
             const callCounts = new Map();
+            const byteBudget = createReviewerByteBudget();
             for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
                 const functionCalls = this.getFunctionCalls(response);
                 if (functionCalls.length === 0)
@@ -302,14 +361,14 @@ export class OpenAIProvider {
                         toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: budgetMessage(call.name, currentLevel) });
                         continue;
                     }
-                    const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope);
+                    const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope, byteBudget);
                     toolCache.set(cacheKey, result);
                     allUsedTools.push(`${call.name}(${argSummary})`);
                     accumulatedToolChars += result.length;
                     console.error(`[duul]   ${call.name}(${argSummary}) -> ${result.length} chars (total: ${accumulatedToolChars}/${toolReadBudget}, level ${getStrategyLevel()})`);
                     toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: result });
                 }
-                response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: toolResults });
+                response = await continueConversation(toolResults);
                 accumulateUsage(response);
                 console.error(`[duul] response.id=${response.id} (after tool round ${round + 1})`);
                 if (getStrategyLevel() >= 3 && this.hasPendingFunctionCalls(response)) {
@@ -317,7 +376,7 @@ export class OpenAIProvider {
                         type: 'function_call_output', call_id: c.call_id,
                         output: 'No more file reads allowed. You must produce your final review verdict now.',
                     }));
-                    response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: stopResults });
+                    response = await continueConversation(stopResults);
                     accumulateUsage(response);
                     break;
                 }
@@ -329,7 +388,7 @@ export class OpenAIProvider {
                     type: 'function_call_output', call_id: c.call_id,
                     output: 'Tool call limit reached. You must produce your final review verdict now.',
                 }));
-                response = await this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: stopResults });
+                response = await continueConversation(stopResults);
                 accumulateUsage(response);
             }
         }
@@ -337,31 +396,68 @@ export class OpenAIProvider {
         const costStr = usage.estimated_cost_usd !== null ? ` (~$${usage.estimated_cost_usd.toFixed(4)})` : '';
         const cachedStr = usage.cached_input_tokens ? ` [cached: ${usage.cached_input_tokens}]` : '';
         console.error(`[duul] Token usage: ${usage.input_tokens} in + ${usage.output_tokens} out = ${usage.total_tokens} total (${usage.api_calls} API calls)${cachedStr}${costStr}`);
+        // Stateless mode: record this round's user/assistant turns so the reviewer
+        // can replay them next round (the ChatGPT backend has no native chaining).
+        // Only the final Q&A is kept — replaying every tool call would bloat tokens
+        // and risks stale encrypted-reasoning items across separate responses.
+        const buildTurns = (assistantText) => this.stateless
+            ? [
+                ...(conversationHistory ?? []),
+                { role: 'user', content: [{ type: 'input_text', text: userMessage }] },
+                { role: 'assistant', content: [{ type: 'output_text', text: assistantText }] },
+            ]
+            : undefined;
         // Extract structured output
+        const outputText = this.getOutputText(response);
         const parsed = this.extractStructuredOutput(response, outputSchema);
         if (parsed !== null) {
-            return { parsed, reviewId: response.id, usage };
+            return { parsed, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? '') };
         }
         if (options.createFallback) {
             const reason = this.hasPendingFunctionCalls(response) ? 'round_limit' : 'budget';
             const fallback = options.createFallback(reason, allUsedTools);
             console.error(`[duul] Returning structured fallback (reason: ${reason}).`);
-            return { parsed: fallback, reviewId: response.id, usage };
+            return { parsed: fallback, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? JSON.stringify(fallback)) };
         }
         throw new Error('Review failed: could not obtain structured verdict after tool loop.');
     }
     async apiCallWithRetry(params) {
+        let refreshedOnce = false;
         for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
             const controller = new AbortController();
             const timeout = setTimeout(() => controller.abort(), 120_000);
             try {
-                const response = await this.client.responses.create({ ...params, stream: false }, { signal: controller.signal });
+                let response;
+                if (this.stateless) {
+                    // ChatGPT backend requires streaming and leaves `response.completed`'s
+                    // `output` empty — aggregate items from the streamed events instead.
+                    const stream = this.client.responses.stream(params, { signal: controller.signal });
+                    response = await this.aggregateStream(stream);
+                }
+                else {
+                    response = (await this.client.responses.create({ ...params, stream: false }, { signal: controller.signal }));
+                }
                 clearTimeout(timeout);
                 return response;
             }
             catch (error) {
                 clearTimeout(timeout);
-                const isRetryable = error instanceof Error && ('status' in error ? (error.status === 429 || error.status >= 500) : error.name === 'AbortError');
+                const status = error instanceof Error && 'status' in error ? error.status : undefined;
+                // ChatGPT token expired mid-review: refresh once and retry immediately.
+                if (status === 401 && this.refresh && !refreshedOnce) {
+                    refreshedOnce = true;
+                    try {
+                        const token = await this.refresh();
+                        this.client = this.buildClient(token);
+                        console.error('[duul] Refreshed Codex token after 401, retrying');
+                        attempt--; // don't consume a retry for the refresh
+                        continue;
+                    }
+                    catch (refreshError) {
+                        console.error(`[duul] Codex token refresh failed: ${refreshError instanceof Error ? refreshError.message : refreshError}`);
+                    }
+                }
+                const isRetryable = error instanceof Error && (status !== undefined ? (status === 429 || status >= 500) : error.name === 'AbortError');
                 if (isRetryable && attempt < MAX_RETRIES - 1) {
                     const delay = 1000 * Math.pow(2, attempt);
                     console.error(`[duul] Retry ${attempt + 1}/${MAX_RETRIES} after ${delay}ms`);
@@ -373,6 +469,53 @@ export class OpenAIProvider {
         }
         throw new Error('Unreachable: exhausted retries');
     }
+    /**
+     * Aggregate a streamed Responses call into a Response object.
+     *
+     * The ChatGPT backend delivers completed output items via
+     * `response.output_item.done` events and returns an EMPTY `output` array on
+     * `response.completed`, so we collect items from the stream ourselves. Usage
+     * and id come from `response.completed` (falling back to `response.created`).
+     */
+    async aggregateStream(stream) {
+        const output = [];
+        let id = '';
+        let usage;
+        for await (const event of stream) {
+            switch (event.type) {
+                case 'response.created':
+                    id = event.response.id;
+                    break;
+                case 'response.output_item.done':
+                    output.push(event.item);
+                    break;
+                case 'response.completed':
+                    id = event.response.id ?? id;
+                    usage = event.response.usage;
+                    break;
+                case 'response.failed':
+                    throw new Error(`ChatGPT backend response failed: ${event.response.error?.message ?? 'unknown error'}`);
+                case 'error':
+                    throw new Error(`ChatGPT backend stream error: ${event.message ?? 'unknown error'}`);
+                default:
+                    break;
+            }
+        }
+        return { id, output, usage };
+    }
+    /** Return the first output_text string in the response, or null. */
+    getOutputText(response) {
+        for (const item of response.output) {
+            if (item.type === 'message' && 'content' in item) {
+                const msg = item;
+                for (const content of msg.content) {
+                    if (content.type === 'output_text' && content.text)
+                        return content.text;
+                }
+            }
+        }
+        return null;
+    }
     extractStructuredOutput(response, outputSchema) {
         for (const item of response.output) {
             if (item.type === 'message' && 'content' in item) {

package/build/services/providers/types.d.ts CHANGED Viewed

@@ -56,8 +56,14 @@ export interface ProviderCapabilities {
     structuredOutputs: boolean;
     /** Supports tool/function calling */
     toolCalling: boolean;
-    /** Supports previous_response_id for conversation continuity */
+    /** Supports NATIVE server-side conversation chaining via previous_response_id */
     previousResponseId: boolean;
+    /**
+     * Continuity is achieved by replaying prior turns (conversationHistory) rather
+     * than native server-side chaining. When true, the reviewer stores/loads
+     * conversation turns per reviewId and passes them back on the next round.
+     */
+    conversationReplay: boolean;
     /** Supports strict JSON schema mode */
     jsonSchemaStrict: boolean;
 }

package/build/services/review-limits.d.ts CHANGED Viewed

@@ -34,3 +34,11 @@ export declare function computeIterationMeta(phase: ReviewPhase, callerIteration
  * (last allowed iteration).
  */
 export declare function isIterationLimitExceeded(phase: ReviewPhase, callerIterationCount?: number, requestMaxOverride?: number): boolean;
+/**
+ * Emit a soft cost warning once iteration_count crosses ~60% of the limit.
+ * Uses the current round's estimated cost as a rough per-round figure so the
+ * orchestrator can decide whether to accept a near-verdict or escalate.
+ *
+ * Returns null when below the threshold, or when iteration_count is 0.
+ */
+export declare function computeCostWarning(iterMeta: IterationMeta, estimatedCostUsd: number | null): string | null;

package/build/services/review-limits.js CHANGED Viewed

@@ -63,3 +63,24 @@ export function isIterationLimitExceeded(phase, callerIterationCount, requestMax
     const limit = getIterationLimit(phase, requestMaxOverride);
     return callerIterationCount > limit;
 }
+const COST_WARNING_RATIO = 0.6;
+/**
+ * Emit a soft cost warning once iteration_count crosses ~60% of the limit.
+ * Uses the current round's estimated cost as a rough per-round figure so the
+ * orchestrator can decide whether to accept a near-verdict or escalate.
+ *
+ * Returns null when below the threshold, or when iteration_count is 0.
+ */
+export function computeCostWarning(iterMeta, estimatedCostUsd) {
+    if (iterMeta.iteration_count <= 0)
+        return null;
+    const trigger = Math.ceil(iterMeta.iteration_limit * COST_WARNING_RATIO);
+    if (iterMeta.iteration_count < trigger)
+        return null;
+    const costStr = estimatedCostUsd !== null && estimatedCostUsd > 0
+        ? `~$${estimatedCostUsd.toFixed(4)}`
+        : 'an unknown amount';
+    return (`This is iteration ${iterMeta.iteration_count} of ${iterMeta.iteration_limit}. ` +
+        `Each round costs ${costStr}. ` +
+        `Consider accepting REVISE-with-minor-issues or escalating to human.`);
+}

package/build/services/reviewer.d.ts CHANGED Viewed

@@ -4,8 +4,14 @@
  */
 import type { z } from 'zod';
 import type { WorkspaceScope } from './filesystem.js';
-import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage } from './providers/types.js';
+import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage, ConversationTurn } from './providers/types.js';
 export type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage };
+export type ReviewToolName = 'plan' | 'code' | 'partition';
+type ReviewerModel = string | {
+    plan?: string;
+    code?: string;
+    partition?: string;
+};
 export interface ReviewOptions<T extends z.ZodType> {
     systemPrompt: string;
     userMessage: string;
@@ -13,9 +19,10 @@ export interface ReviewOptions<T extends z.ZodType> {
     outputSchema: T;
     workspaceScope?: WorkspaceScope | null;
     previousReviewId?: string;
+    toolName?: ReviewToolName;
     reviewerConfig?: {
         provider?: string;
-        model?: string;
+        model?: ReviewerModel;
         base_url?: string;
         api_key?: string;
         temperature?: number;
@@ -23,6 +30,31 @@ export interface ReviewOptions<T extends z.ZodType> {
     };
     createFallback?: (reason: ExhaustionReason, usedTools: string[]) => z.infer<T>;
 }
+/**
+ * Resolve a concrete model string from either the flat string form or
+ * the per-tool object form. Returns undefined when nothing is set so the
+ * provider falls back to env/default.
+ */
+export declare function resolveModelForTool(model: ReviewerModel | undefined, toolName: ReviewToolName | undefined): string | undefined;
+/**
+ * Decide how to handle cross-round continuity for a provider, given whether the
+ * caller supplied a previousReviewId. Pure function so it can be unit-tested.
+ *
+ * - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
+ * - `shouldWarn`: the caller asked for continuity but the provider supports
+ *   neither native chaining nor replay, so context will be lost.
+ */
+export declare function continuityPlan(capabilities: {
+    previousResponseId: boolean;
+    conversationReplay: boolean;
+}, hasPreviousReviewId: boolean): {
+    shouldLoad: boolean;
+    shouldWarn: boolean;
+};
+/** Reset the in-memory conversation store. Test-only. */
+export declare function __resetConversationStoreForTest(): void;
+export declare function getConversationHistory(reviewId: string, workspaceRoot?: string): Promise<ConversationTurn[] | undefined>;
+export declare function storeConversation(reviewId: string, turns: ConversationTurn[], workspaceRoot?: string): Promise<void>;
 /**
  * Main entry point for all review calls.
  * Resolves provider from config, delegates the call.

package/build/services/reviewer.js CHANGED Viewed

@@ -3,6 +3,21 @@ import { join, dirname } from 'node:path';
 import { OpenAIProvider } from './providers/openai.js';
 import { AnthropicProvider } from './providers/anthropic.js';
 import { GoogleProvider } from './providers/google.js';
+import { resolveCodexCredential } from './providers/codex-auth.js';
+/**
+ * Resolve a concrete model string from either the flat string form or
+ * the per-tool object form. Returns undefined when nothing is set so the
+ * provider falls back to env/default.
+ */
+export function resolveModelForTool(model, toolName) {
+    if (model === undefined)
+        return undefined;
+    if (typeof model === 'string')
+        return model;
+    if (!toolName)
+        return undefined;
+    return model[toolName];
+}
 /**
  * Resolve the effective provider name from config and env vars.
  * Priority: per-request config > env REVIEW_PROVIDER > "openai"
@@ -52,26 +67,50 @@ function apiKeyFingerprint(key) {
         return key;
     return `${key.slice(0, 4)}...${key.slice(-4)}`;
 }
-function getProviderCacheKey(provider, config) {
+function getProviderCacheKey(provider, resolvedModel, config) {
     const apiKey = config?.api_key ?? resolveApiKey(provider);
     return JSON.stringify({
         provider,
-        model: config?.model,
+        model: resolvedModel,
         base_url: config?.base_url,
         temperature: config?.temperature,
         top_p: config?.top_p,
         key_fp: apiKeyFingerprint(apiKey),
     });
 }
+/**
+ * Resolve the OpenAI credential, falling back to the Codex CLI login when no
+ * explicit or env API key is present. Returns either an API key or a ChatGPT
+ * bearer credential (Sign in with ChatGPT).
+ */
+async function resolveOpenAiCredential(configApiKey) {
+    const explicitKey = configApiKey ?? process.env.OPENAI_API_KEY;
+    if (explicitKey)
+        return { apiKey: explicitKey };
+    const cred = await resolveCodexCredential();
+    if (!cred)
+        return {}; // let the provider throw its standard "no credential" error
+    if (cred.mode === 'apikey') {
+        console.error('[duul] Using OpenAI API key from Codex CLI login (~/.codex/auth.json)');
+        return { apiKey: cred.apiKey };
+    }
+    console.error('[duul] Using Sign in with ChatGPT credentials from Codex CLI login');
+    return { chatgpt: { accessToken: cred.accessToken, accountId: cred.accountId, refresh: cred.refresh } };
+}
 /**
  * Create or retrieve a cached provider instance.
+ *
+ * `toolName` lets callers use the per-tool model override form:
+ * `{ plan: "...", code: "...", partition: "..." }`. The resolved model
+ * participates in the cache key so per-tool models don't collide.
  */
-function getProvider(reviewerConfig) {
+async function getProvider(reviewerConfig, toolName) {
     const providerName = resolveProviderName(reviewerConfig?.provider);
     const hasEphemeralKey = !!reviewerConfig?.api_key;
+    const resolvedModel = resolveModelForTool(reviewerConfig?.model, toolName);
     // Per-request api_key → skip cache (ephemeral credential, don't leak into shared cache)
     if (!hasEphemeralKey) {
-        const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
+        const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
         if (providerCache.has(cacheKey)) {
             return providerCache.get(cacheKey);
         }
@@ -80,15 +119,21 @@ function getProvider(reviewerConfig) {
     const constructorConfig = {
         apiKey,
         baseUrl: reviewerConfig?.base_url,
-        model: reviewerConfig?.model,
+        model: resolvedModel,
         temperature: reviewerConfig?.temperature,
         topP: reviewerConfig?.top_p,
     };
     let provider;
+    // ChatGPT-login providers hold a rotating bearer token — never cache them.
+    let bypassCache = hasEphemeralKey;
     switch (providerName) {
-        case 'openai':
-            provider = new OpenAIProvider(constructorConfig);
+        case 'openai': {
+            const cred = await resolveOpenAiCredential(reviewerConfig?.api_key);
+            if (cred.chatgpt)
+                bypassCache = true;
+            provider = new OpenAIProvider({ ...constructorConfig, apiKey: cred.apiKey ?? apiKey, chatgpt: cred.chatgpt });
             break;
+        }
         case 'anthropic':
             provider = new AnthropicProvider(constructorConfig);
             break;
@@ -110,18 +155,18 @@ function getProvider(reviewerConfig) {
         default:
             throw new Error(`Unknown provider: ${providerName}`);
     }
-    // Only cache env-based providers (not ephemeral per-request keys)
-    if (!hasEphemeralKey) {
+    // Only cache stable env-based providers (not ephemeral keys or rotating tokens)
+    if (!bypassCache) {
         // Evict oldest entry if cache is full
         if (providerCache.size >= MAX_CACHE_SIZE) {
             const oldestKey = providerCache.keys().next().value;
             providerCache.delete(oldestKey);
             console.error(`[duul] Provider cache full, evicted oldest entry`);
         }
-        const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
+        const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
         providerCache.set(cacheKey, provider);
     }
-    console.error(`[duul] Created ${providerName} provider (model: ${reviewerConfig?.model ?? 'default'}${hasEphemeralKey ? ', ephemeral key' : ''})`);
+    console.error(`[duul] Created ${providerName} provider (model: ${resolvedModel ?? 'default'}${toolName ? `, tool: ${toolName}` : ''}${bypassCache ? ', uncached' : ''})`);
     return provider;
 }
 // --- Conversation history store (disk-persisted per workspace) ---
@@ -142,6 +187,11 @@ function conversationsPath(workspaceRoot) {
 async function loadFromDisk(workspaceRoot) {
     if (diskLoaded && lastWorkspaceRoot === workspaceRoot)
         return;
+    // Switching workspaces: drop the previous workspace's entries so they aren't
+    // flushed into (or replayed from) the new workspace's conversations file.
+    if (lastWorkspaceRoot !== null && lastWorkspaceRoot !== workspaceRoot) {
+        memoryCache.clear();
+    }
     lastWorkspaceRoot = workspaceRoot;
     diskLoaded = true;
     try {
@@ -189,7 +239,29 @@ function evictOldest() {
         console.error(`[duul] Conversation store full, evicted oldest entry`);
     }
 }
-async function getConversationHistory(reviewId, workspaceRoot) {
+/**
+ * Decide how to handle cross-round continuity for a provider, given whether the
+ * caller supplied a previousReviewId. Pure function so it can be unit-tested.
+ *
+ * - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
+ * - `shouldWarn`: the caller asked for continuity but the provider supports
+ *   neither native chaining nor replay, so context will be lost.
+ */
+export function continuityPlan(capabilities, hasPreviousReviewId) {
+    if (!hasPreviousReviewId)
+        return { shouldLoad: false, shouldWarn: false };
+    return {
+        shouldLoad: capabilities.conversationReplay,
+        shouldWarn: !capabilities.previousResponseId && !capabilities.conversationReplay,
+    };
+}
+/** Reset the in-memory conversation store. Test-only. */
+export function __resetConversationStoreForTest() {
+    memoryCache.clear();
+    diskLoaded = false;
+    lastWorkspaceRoot = null;
+}
+export async function getConversationHistory(reviewId, workspaceRoot) {
     if (workspaceRoot)
         await loadFromDisk(workspaceRoot);
     const entry = memoryCache.get(reviewId);
@@ -198,7 +270,7 @@ async function getConversationHistory(reviewId, workspaceRoot) {
     entry.lastAccessed = Date.now();
     return entry.turns;
 }
-async function storeConversation(reviewId, turns, workspaceRoot) {
+export async function storeConversation(reviewId, turns, workspaceRoot) {
     evictOldest();
     memoryCache.set(reviewId, { turns, lastAccessed: Date.now() });
     if (workspaceRoot) {
@@ -210,21 +282,23 @@ async function storeConversation(reviewId, turns, workspaceRoot) {
  * Resolves provider from config, delegates the call.
  */
 export async function callReview(options) {
-    const provider = getProvider(options.reviewerConfig);
+    const provider = await getProvider(options.reviewerConfig, options.toolName);
     // Log capability warnings for non-full-featured providers
     if (!provider.capabilities.toolCalling && options.workspaceScope?.root) {
         console.error(`[duul] Warning: ${provider.name} provider does not support tool calling. ` +
             'Reviewer will not be able to explore the workspace. Consider providing more context via relevant_code/artifact_refs.');
     }
-    if (!provider.capabilities.previousResponseId && options.previousReviewId) {
-        console.error(`[duul] Warning: ${provider.name} provider does not support previous_response_id. ` +
+    const plan = continuityPlan(provider.capabilities, !!options.previousReviewId);
+    if (plan.shouldWarn) {
+        console.error(`[duul] Warning: ${provider.name} provider does not support conversation continuity. ` +
             'Reviewer context from previous rounds will not be available.');
     }
     const workspaceRoot = options.workspaceScope?.root;
-    // Retrieve conversation history for providers that use simulated context
-    // OpenAI uses native previous_response_id, so skip for it
+    // Retrieve conversation history for replay-based providers (Anthropic, and the
+    // OpenAI ChatGPT-login backend). Native-chaining providers (OpenAI api-key)
+    // pass previousReviewId straight through and don't need replay.
     let conversationHistory;
-    if (options.previousReviewId && provider.capabilities.previousResponseId && provider.name !== 'openai') {
+    if (plan.shouldLoad) {
         conversationHistory = await getConversationHistory(options.previousReviewId, workspaceRoot);
         if (conversationHistory) {
             console.error(`[duul] Loaded conversation history for ${options.previousReviewId} (${conversationHistory.length} turns)`);
@@ -234,8 +308,8 @@ export async function callReview(options) {
         }
     }
     const result = await provider.review({ ...options, conversationHistory });
-    // Store conversation turns for future rounds (non-OpenAI providers)
-    if (result.conversationTurns?.length && provider.name !== 'openai') {
+    // Store conversation turns for future rounds (replay-based providers only)
+    if (result.conversationTurns?.length && provider.capabilities.conversationReplay) {
         await storeConversation(result.reviewId, result.conversationTurns, workspaceRoot);
         console.error(`[duul] Stored conversation (${result.conversationTurns.length} turns) for ${result.reviewId}`);
     }