@planningo/duul 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ko.md +92 -6
- package/README.md +94 -7
- package/build/prompts/code-review-system.js +11 -1
- package/build/prompts/plan-review-system.js +11 -1
- package/build/schemas/code-review.d.ts +48 -11
- package/build/schemas/code-review.js +22 -3
- package/build/schemas/common.d.ts +26 -3
- package/build/schemas/common.js +16 -2
- package/build/schemas/execution-partition.d.ts +97 -63
- package/build/schemas/execution-partition.js +13 -3
- package/build/schemas/plan-review.d.ts +42 -8
- package/build/schemas/plan-review.js +15 -1
- package/build/services/filesystem-tools.d.ts +19 -1
- package/build/services/filesystem-tools.js +50 -13
- package/build/services/filesystem.d.ts +20 -0
- package/build/services/filesystem.js +51 -17
- package/build/services/providers/anthropic.js +5 -3
- package/build/services/providers/codex-auth.d.ts +51 -0
- package/build/services/providers/codex-auth.js +178 -0
- package/build/services/providers/google.js +4 -2
- package/build/services/providers/openai.d.ts +33 -0
- package/build/services/providers/openai.js +173 -30
- package/build/services/providers/types.d.ts +7 -1
- package/build/services/review-limits.d.ts +8 -0
- package/build/services/review-limits.js +21 -0
- package/build/services/reviewer.d.ts +34 -2
- package/build/services/reviewer.js +95 -21
- package/build/tools/code-review.js +50 -7
- package/build/tools/execution-partition.js +55 -10
- package/build/tools/plan-review.js +38 -6
- package/package.json +1 -1
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
1
2
|
import OpenAI from 'openai';
|
|
2
3
|
import { zodTextFormat } from 'openai/helpers/zod';
|
|
3
4
|
import { validateProjectRoot } from '../filesystem.js';
|
|
4
|
-
import {
|
|
5
|
+
import { CHATGPT_BASE_URL } from './codex-auth.js';
|
|
6
|
+
import { executeFilesystemTool, createReviewerByteBudget } from '../filesystem-tools.js';
|
|
5
7
|
import { estimateCost } from '../pricing.js';
|
|
6
8
|
const MAX_INPUT_CHARS = 400_000;
|
|
7
9
|
const MAX_TOOL_ROUNDS = 10;
|
|
@@ -166,31 +168,57 @@ function validateInputLength(systemPrompt, userMessage) {
|
|
|
166
168
|
}
|
|
167
169
|
export class OpenAIProvider {
|
|
168
170
|
name = 'openai';
|
|
169
|
-
capabilities
|
|
170
|
-
structuredOutputs: true,
|
|
171
|
-
toolCalling: true,
|
|
172
|
-
previousResponseId: true,
|
|
173
|
-
jsonSchemaStrict: true,
|
|
174
|
-
};
|
|
171
|
+
capabilities;
|
|
175
172
|
client;
|
|
176
173
|
model;
|
|
177
174
|
temperature;
|
|
178
175
|
topP;
|
|
176
|
+
/**
|
|
177
|
+
* ChatGPT-backend mode. The endpoint is stateless (`store: false`): it does
|
|
178
|
+
* not support `previous_response_id`, `temperature`/`top_p`, or
|
|
179
|
+
* `max_output_tokens`, and it streams. We resend the full input each turn.
|
|
180
|
+
*/
|
|
181
|
+
stateless;
|
|
182
|
+
baseURL;
|
|
183
|
+
defaultHeaders;
|
|
184
|
+
refresh;
|
|
185
|
+
reasoningEffort;
|
|
179
186
|
constructor(config) {
|
|
180
|
-
const
|
|
187
|
+
const chatgpt = config?.chatgpt;
|
|
188
|
+
this.stateless = !!chatgpt;
|
|
189
|
+
this.refresh = chatgpt?.refresh;
|
|
190
|
+
this.reasoningEffort = process.env.DUUL_REASONING_EFFORT ?? 'medium';
|
|
191
|
+
const apiKey = chatgpt?.accessToken ?? config?.apiKey ?? process.env.OPENAI_API_KEY;
|
|
181
192
|
if (!apiKey) {
|
|
182
|
-
throw new Error('OPENAI_API_KEY
|
|
193
|
+
throw new Error('No OpenAI credential found. Set OPENAI_API_KEY, or sign in with the Codex CLI (`codex login`).');
|
|
183
194
|
}
|
|
184
|
-
this.
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
195
|
+
this.baseURL = chatgpt ? CHATGPT_BASE_URL : config?.baseUrl;
|
|
196
|
+
this.defaultHeaders = chatgpt
|
|
197
|
+
? { 'chatgpt-account-id': chatgpt.accountId, originator: 'codex_cli_rs', 'session-id': randomUUID() }
|
|
198
|
+
: undefined;
|
|
199
|
+
this.client = this.buildClient(apiKey);
|
|
188
200
|
this.model = config?.model ?? process.env.REVIEW_MODEL ?? 'gpt-5.4';
|
|
189
201
|
this.temperature = config?.temperature ?? 0.2;
|
|
190
202
|
this.topP = config?.topP ?? 0.1;
|
|
203
|
+
this.capabilities = {
|
|
204
|
+
structuredOutputs: true,
|
|
205
|
+
toolCalling: true,
|
|
206
|
+
// Native server-side chaining is available only in api-key mode. The
|
|
207
|
+
// ChatGPT backend is stateless, so continuity there comes from turn replay.
|
|
208
|
+
previousResponseId: !this.stateless,
|
|
209
|
+
conversationReplay: this.stateless,
|
|
210
|
+
jsonSchemaStrict: true,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
buildClient(apiKey) {
|
|
214
|
+
return new OpenAI({
|
|
215
|
+
apiKey,
|
|
216
|
+
...(this.baseURL ? { baseURL: this.baseURL } : {}),
|
|
217
|
+
...(this.defaultHeaders ? { defaultHeaders: this.defaultHeaders } : {}),
|
|
218
|
+
});
|
|
191
219
|
}
|
|
192
220
|
async review(options) {
|
|
193
|
-
const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId } = options;
|
|
221
|
+
const { systemPrompt, userMessage, schemaName, outputSchema, workspaceScope, previousReviewId, conversationHistory } = options;
|
|
194
222
|
validateInputLength(systemPrompt, userMessage);
|
|
195
223
|
const effectiveRoot = workspaceScope?.root ?? null;
|
|
196
224
|
if (effectiveRoot && !workspaceScope) {
|
|
@@ -225,19 +253,49 @@ export class OpenAIProvider {
|
|
|
225
253
|
const baseParams = {
|
|
226
254
|
model: this.model,
|
|
227
255
|
instructions: systemPrompt,
|
|
228
|
-
temperature: this.temperature,
|
|
229
|
-
top_p: this.topP,
|
|
230
|
-
max_output_tokens: 16384,
|
|
231
256
|
text: { format: zodTextFormat(outputSchema, schemaName) },
|
|
232
257
|
...(tools ? { tools } : {}),
|
|
258
|
+
...(this.stateless
|
|
259
|
+
? {
|
|
260
|
+
// ChatGPT backend: stateless, reasoning-only sampling, encrypted
|
|
261
|
+
// reasoning must be echoed back on each turn (store: false).
|
|
262
|
+
store: false,
|
|
263
|
+
reasoning: { effort: this.reasoningEffort },
|
|
264
|
+
include: ['reasoning.encrypted_content'],
|
|
265
|
+
}
|
|
266
|
+
: {
|
|
267
|
+
temperature: this.temperature,
|
|
268
|
+
top_p: this.topP,
|
|
269
|
+
max_output_tokens: 16384,
|
|
270
|
+
}),
|
|
233
271
|
};
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
272
|
+
// Stateless (ChatGPT backend): accumulate the full input across tool rounds
|
|
273
|
+
// since there is no server-side `previous_response_id` chaining. Prior rounds
|
|
274
|
+
// are replayed as message items (user: input_text, assistant: output_text).
|
|
275
|
+
const inputItems = [];
|
|
276
|
+
if (this.stateless && conversationHistory?.length) {
|
|
277
|
+
inputItems.push(...conversationHistory);
|
|
278
|
+
}
|
|
279
|
+
inputItems.push({ role: 'user', content: [{ type: 'input_text', text: userMessage }] });
|
|
280
|
+
let response = this.stateless
|
|
281
|
+
? await this.apiCallWithRetry({ ...baseParams, input: inputItems })
|
|
282
|
+
: await this.apiCallWithRetry({
|
|
283
|
+
...baseParams,
|
|
284
|
+
input: inputItems,
|
|
285
|
+
...(previousReviewId ? { previous_response_id: previousReviewId } : {}),
|
|
286
|
+
});
|
|
239
287
|
accumulateUsage(response);
|
|
240
288
|
console.error(`[duul] response.id=${response.id} model=${this.model} provider=openai`);
|
|
289
|
+
// Continue the conversation after a tool round. Stateless mode resends the
|
|
290
|
+
// whole input (prior assistant output items + the new tool outputs); chained
|
|
291
|
+
// mode uses server-side previous_response_id and sends only the new items.
|
|
292
|
+
const continueConversation = async (newItems) => {
|
|
293
|
+
if (this.stateless) {
|
|
294
|
+
inputItems.push(...response.output, ...newItems);
|
|
295
|
+
return this.apiCallWithRetry({ ...baseParams, input: inputItems });
|
|
296
|
+
}
|
|
297
|
+
return this.apiCallWithRetry({ ...baseParams, previous_response_id: response.id, input: newItems });
|
|
298
|
+
};
|
|
241
299
|
// Agentic tool-calling loop
|
|
242
300
|
if (effectiveRoot) {
|
|
243
301
|
const toolReadBudget = MAX_INPUT_CHARS - (systemPrompt.length + userMessage.length);
|
|
@@ -274,6 +332,7 @@ export class OpenAIProvider {
|
|
|
274
332
|
};
|
|
275
333
|
const toolCache = new Map();
|
|
276
334
|
const callCounts = new Map();
|
|
335
|
+
const byteBudget = createReviewerByteBudget();
|
|
277
336
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
|
278
337
|
const functionCalls = this.getFunctionCalls(response);
|
|
279
338
|
if (functionCalls.length === 0)
|
|
@@ -302,14 +361,14 @@ export class OpenAIProvider {
|
|
|
302
361
|
toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: budgetMessage(call.name, currentLevel) });
|
|
303
362
|
continue;
|
|
304
363
|
}
|
|
305
|
-
const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope);
|
|
364
|
+
const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope, byteBudget);
|
|
306
365
|
toolCache.set(cacheKey, result);
|
|
307
366
|
allUsedTools.push(`${call.name}(${argSummary})`);
|
|
308
367
|
accumulatedToolChars += result.length;
|
|
309
368
|
console.error(`[duul] ${call.name}(${argSummary}) -> ${result.length} chars (total: ${accumulatedToolChars}/${toolReadBudget}, level ${getStrategyLevel()})`);
|
|
310
369
|
toolResults.push({ type: 'function_call_output', call_id: call.call_id, output: result });
|
|
311
370
|
}
|
|
312
|
-
response = await
|
|
371
|
+
response = await continueConversation(toolResults);
|
|
313
372
|
accumulateUsage(response);
|
|
314
373
|
console.error(`[duul] response.id=${response.id} (after tool round ${round + 1})`);
|
|
315
374
|
if (getStrategyLevel() >= 3 && this.hasPendingFunctionCalls(response)) {
|
|
@@ -317,7 +376,7 @@ export class OpenAIProvider {
|
|
|
317
376
|
type: 'function_call_output', call_id: c.call_id,
|
|
318
377
|
output: 'No more file reads allowed. You must produce your final review verdict now.',
|
|
319
378
|
}));
|
|
320
|
-
response = await
|
|
379
|
+
response = await continueConversation(stopResults);
|
|
321
380
|
accumulateUsage(response);
|
|
322
381
|
break;
|
|
323
382
|
}
|
|
@@ -329,7 +388,7 @@ export class OpenAIProvider {
|
|
|
329
388
|
type: 'function_call_output', call_id: c.call_id,
|
|
330
389
|
output: 'Tool call limit reached. You must produce your final review verdict now.',
|
|
331
390
|
}));
|
|
332
|
-
response = await
|
|
391
|
+
response = await continueConversation(stopResults);
|
|
333
392
|
accumulateUsage(response);
|
|
334
393
|
}
|
|
335
394
|
}
|
|
@@ -337,31 +396,68 @@ export class OpenAIProvider {
|
|
|
337
396
|
const costStr = usage.estimated_cost_usd !== null ? ` (~$${usage.estimated_cost_usd.toFixed(4)})` : '';
|
|
338
397
|
const cachedStr = usage.cached_input_tokens ? ` [cached: ${usage.cached_input_tokens}]` : '';
|
|
339
398
|
console.error(`[duul] Token usage: ${usage.input_tokens} in + ${usage.output_tokens} out = ${usage.total_tokens} total (${usage.api_calls} API calls)${cachedStr}${costStr}`);
|
|
399
|
+
// Stateless mode: record this round's user/assistant turns so the reviewer
|
|
400
|
+
// can replay them next round (the ChatGPT backend has no native chaining).
|
|
401
|
+
// Only the final Q&A is kept — replaying every tool call would bloat tokens
|
|
402
|
+
// and risks stale encrypted-reasoning items across separate responses.
|
|
403
|
+
const buildTurns = (assistantText) => this.stateless
|
|
404
|
+
? [
|
|
405
|
+
...(conversationHistory ?? []),
|
|
406
|
+
{ role: 'user', content: [{ type: 'input_text', text: userMessage }] },
|
|
407
|
+
{ role: 'assistant', content: [{ type: 'output_text', text: assistantText }] },
|
|
408
|
+
]
|
|
409
|
+
: undefined;
|
|
340
410
|
// Extract structured output
|
|
411
|
+
const outputText = this.getOutputText(response);
|
|
341
412
|
const parsed = this.extractStructuredOutput(response, outputSchema);
|
|
342
413
|
if (parsed !== null) {
|
|
343
|
-
return { parsed, reviewId: response.id, usage };
|
|
414
|
+
return { parsed, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? '') };
|
|
344
415
|
}
|
|
345
416
|
if (options.createFallback) {
|
|
346
417
|
const reason = this.hasPendingFunctionCalls(response) ? 'round_limit' : 'budget';
|
|
347
418
|
const fallback = options.createFallback(reason, allUsedTools);
|
|
348
419
|
console.error(`[duul] Returning structured fallback (reason: ${reason}).`);
|
|
349
|
-
return { parsed: fallback, reviewId: response.id, usage };
|
|
420
|
+
return { parsed: fallback, reviewId: response.id, usage, conversationTurns: buildTurns(outputText ?? JSON.stringify(fallback)) };
|
|
350
421
|
}
|
|
351
422
|
throw new Error('Review failed: could not obtain structured verdict after tool loop.');
|
|
352
423
|
}
|
|
353
424
|
async apiCallWithRetry(params) {
|
|
425
|
+
let refreshedOnce = false;
|
|
354
426
|
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
355
427
|
const controller = new AbortController();
|
|
356
428
|
const timeout = setTimeout(() => controller.abort(), 120_000);
|
|
357
429
|
try {
|
|
358
|
-
|
|
430
|
+
let response;
|
|
431
|
+
if (this.stateless) {
|
|
432
|
+
// ChatGPT backend requires streaming and leaves `response.completed`'s
|
|
433
|
+
// `output` empty — aggregate items from the streamed events instead.
|
|
434
|
+
const stream = this.client.responses.stream(params, { signal: controller.signal });
|
|
435
|
+
response = await this.aggregateStream(stream);
|
|
436
|
+
}
|
|
437
|
+
else {
|
|
438
|
+
response = (await this.client.responses.create({ ...params, stream: false }, { signal: controller.signal }));
|
|
439
|
+
}
|
|
359
440
|
clearTimeout(timeout);
|
|
360
441
|
return response;
|
|
361
442
|
}
|
|
362
443
|
catch (error) {
|
|
363
444
|
clearTimeout(timeout);
|
|
364
|
-
const
|
|
445
|
+
const status = error instanceof Error && 'status' in error ? error.status : undefined;
|
|
446
|
+
// ChatGPT token expired mid-review: refresh once and retry immediately.
|
|
447
|
+
if (status === 401 && this.refresh && !refreshedOnce) {
|
|
448
|
+
refreshedOnce = true;
|
|
449
|
+
try {
|
|
450
|
+
const token = await this.refresh();
|
|
451
|
+
this.client = this.buildClient(token);
|
|
452
|
+
console.error('[duul] Refreshed Codex token after 401, retrying');
|
|
453
|
+
attempt--; // don't consume a retry for the refresh
|
|
454
|
+
continue;
|
|
455
|
+
}
|
|
456
|
+
catch (refreshError) {
|
|
457
|
+
console.error(`[duul] Codex token refresh failed: ${refreshError instanceof Error ? refreshError.message : refreshError}`);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
const isRetryable = error instanceof Error && (status !== undefined ? (status === 429 || status >= 500) : error.name === 'AbortError');
|
|
365
461
|
if (isRetryable && attempt < MAX_RETRIES - 1) {
|
|
366
462
|
const delay = 1000 * Math.pow(2, attempt);
|
|
367
463
|
console.error(`[duul] Retry ${attempt + 1}/${MAX_RETRIES} after ${delay}ms`);
|
|
@@ -373,6 +469,53 @@ export class OpenAIProvider {
|
|
|
373
469
|
}
|
|
374
470
|
throw new Error('Unreachable: exhausted retries');
|
|
375
471
|
}
|
|
472
|
+
/**
|
|
473
|
+
* Aggregate a streamed Responses call into a Response object.
|
|
474
|
+
*
|
|
475
|
+
* The ChatGPT backend delivers completed output items via
|
|
476
|
+
* `response.output_item.done` events and returns an EMPTY `output` array on
|
|
477
|
+
* `response.completed`, so we collect items from the stream ourselves. Usage
|
|
478
|
+
* and id come from `response.completed` (falling back to `response.created`).
|
|
479
|
+
*/
|
|
480
|
+
async aggregateStream(stream) {
|
|
481
|
+
const output = [];
|
|
482
|
+
let id = '';
|
|
483
|
+
let usage;
|
|
484
|
+
for await (const event of stream) {
|
|
485
|
+
switch (event.type) {
|
|
486
|
+
case 'response.created':
|
|
487
|
+
id = event.response.id;
|
|
488
|
+
break;
|
|
489
|
+
case 'response.output_item.done':
|
|
490
|
+
output.push(event.item);
|
|
491
|
+
break;
|
|
492
|
+
case 'response.completed':
|
|
493
|
+
id = event.response.id ?? id;
|
|
494
|
+
usage = event.response.usage;
|
|
495
|
+
break;
|
|
496
|
+
case 'response.failed':
|
|
497
|
+
throw new Error(`ChatGPT backend response failed: ${event.response.error?.message ?? 'unknown error'}`);
|
|
498
|
+
case 'error':
|
|
499
|
+
throw new Error(`ChatGPT backend stream error: ${event.message ?? 'unknown error'}`);
|
|
500
|
+
default:
|
|
501
|
+
break;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
return { id, output, usage };
|
|
505
|
+
}
|
|
506
|
+
/** Return the first output_text string in the response, or null. */
|
|
507
|
+
getOutputText(response) {
|
|
508
|
+
for (const item of response.output) {
|
|
509
|
+
if (item.type === 'message' && 'content' in item) {
|
|
510
|
+
const msg = item;
|
|
511
|
+
for (const content of msg.content) {
|
|
512
|
+
if (content.type === 'output_text' && content.text)
|
|
513
|
+
return content.text;
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
return null;
|
|
518
|
+
}
|
|
376
519
|
extractStructuredOutput(response, outputSchema) {
|
|
377
520
|
for (const item of response.output) {
|
|
378
521
|
if (item.type === 'message' && 'content' in item) {
|
|
@@ -56,8 +56,14 @@ export interface ProviderCapabilities {
|
|
|
56
56
|
structuredOutputs: boolean;
|
|
57
57
|
/** Supports tool/function calling */
|
|
58
58
|
toolCalling: boolean;
|
|
59
|
-
/** Supports
|
|
59
|
+
/** Supports NATIVE server-side conversation chaining via previous_response_id */
|
|
60
60
|
previousResponseId: boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Continuity is achieved by replaying prior turns (conversationHistory) rather
|
|
63
|
+
* than native server-side chaining. When true, the reviewer stores/loads
|
|
64
|
+
* conversation turns per reviewId and passes them back on the next round.
|
|
65
|
+
*/
|
|
66
|
+
conversationReplay: boolean;
|
|
61
67
|
/** Supports strict JSON schema mode */
|
|
62
68
|
jsonSchemaStrict: boolean;
|
|
63
69
|
}
|
|
@@ -34,3 +34,11 @@ export declare function computeIterationMeta(phase: ReviewPhase, callerIteration
|
|
|
34
34
|
* (last allowed iteration).
|
|
35
35
|
*/
|
|
36
36
|
export declare function isIterationLimitExceeded(phase: ReviewPhase, callerIterationCount?: number, requestMaxOverride?: number): boolean;
|
|
37
|
+
/**
|
|
38
|
+
* Emit a soft cost warning once iteration_count crosses ~60% of the limit.
|
|
39
|
+
* Uses the current round's estimated cost as a rough per-round figure so the
|
|
40
|
+
* orchestrator can decide whether to accept a near-verdict or escalate.
|
|
41
|
+
*
|
|
42
|
+
* Returns null when below the threshold, or when iteration_count is 0.
|
|
43
|
+
*/
|
|
44
|
+
export declare function computeCostWarning(iterMeta: IterationMeta, estimatedCostUsd: number | null): string | null;
|
|
@@ -63,3 +63,24 @@ export function isIterationLimitExceeded(phase, callerIterationCount, requestMax
|
|
|
63
63
|
const limit = getIterationLimit(phase, requestMaxOverride);
|
|
64
64
|
return callerIterationCount > limit;
|
|
65
65
|
}
|
|
66
|
+
const COST_WARNING_RATIO = 0.6;
|
|
67
|
+
/**
|
|
68
|
+
* Emit a soft cost warning once iteration_count crosses ~60% of the limit.
|
|
69
|
+
* Uses the current round's estimated cost as a rough per-round figure so the
|
|
70
|
+
* orchestrator can decide whether to accept a near-verdict or escalate.
|
|
71
|
+
*
|
|
72
|
+
* Returns null when below the threshold, or when iteration_count is 0.
|
|
73
|
+
*/
|
|
74
|
+
export function computeCostWarning(iterMeta, estimatedCostUsd) {
|
|
75
|
+
if (iterMeta.iteration_count <= 0)
|
|
76
|
+
return null;
|
|
77
|
+
const trigger = Math.ceil(iterMeta.iteration_limit * COST_WARNING_RATIO);
|
|
78
|
+
if (iterMeta.iteration_count < trigger)
|
|
79
|
+
return null;
|
|
80
|
+
const costStr = estimatedCostUsd !== null && estimatedCostUsd > 0
|
|
81
|
+
? `~$${estimatedCostUsd.toFixed(4)}`
|
|
82
|
+
: 'an unknown amount';
|
|
83
|
+
return (`This is iteration ${iterMeta.iteration_count} of ${iterMeta.iteration_limit}. ` +
|
|
84
|
+
`Each round costs ${costStr}. ` +
|
|
85
|
+
`Consider accepting REVISE-with-minor-issues or escalating to human.`);
|
|
86
|
+
}
|
|
@@ -4,8 +4,14 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import type { z } from 'zod';
|
|
6
6
|
import type { WorkspaceScope } from './filesystem.js';
|
|
7
|
-
import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage } from './providers/types.js';
|
|
7
|
+
import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage, ConversationTurn } from './providers/types.js';
|
|
8
8
|
export type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage };
|
|
9
|
+
export type ReviewToolName = 'plan' | 'code' | 'partition';
|
|
10
|
+
type ReviewerModel = string | {
|
|
11
|
+
plan?: string;
|
|
12
|
+
code?: string;
|
|
13
|
+
partition?: string;
|
|
14
|
+
};
|
|
9
15
|
export interface ReviewOptions<T extends z.ZodType> {
|
|
10
16
|
systemPrompt: string;
|
|
11
17
|
userMessage: string;
|
|
@@ -13,9 +19,10 @@ export interface ReviewOptions<T extends z.ZodType> {
|
|
|
13
19
|
outputSchema: T;
|
|
14
20
|
workspaceScope?: WorkspaceScope | null;
|
|
15
21
|
previousReviewId?: string;
|
|
22
|
+
toolName?: ReviewToolName;
|
|
16
23
|
reviewerConfig?: {
|
|
17
24
|
provider?: string;
|
|
18
|
-
model?:
|
|
25
|
+
model?: ReviewerModel;
|
|
19
26
|
base_url?: string;
|
|
20
27
|
api_key?: string;
|
|
21
28
|
temperature?: number;
|
|
@@ -23,6 +30,31 @@ export interface ReviewOptions<T extends z.ZodType> {
|
|
|
23
30
|
};
|
|
24
31
|
createFallback?: (reason: ExhaustionReason, usedTools: string[]) => z.infer<T>;
|
|
25
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Resolve a concrete model string from either the flat string form or
|
|
35
|
+
* the per-tool object form. Returns undefined when nothing is set so the
|
|
36
|
+
* provider falls back to env/default.
|
|
37
|
+
*/
|
|
38
|
+
export declare function resolveModelForTool(model: ReviewerModel | undefined, toolName: ReviewToolName | undefined): string | undefined;
|
|
39
|
+
/**
|
|
40
|
+
* Decide how to handle cross-round continuity for a provider, given whether the
|
|
41
|
+
* caller supplied a previousReviewId. Pure function so it can be unit-tested.
|
|
42
|
+
*
|
|
43
|
+
* - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
|
|
44
|
+
* - `shouldWarn`: the caller asked for continuity but the provider supports
|
|
45
|
+
* neither native chaining nor replay, so context will be lost.
|
|
46
|
+
*/
|
|
47
|
+
export declare function continuityPlan(capabilities: {
|
|
48
|
+
previousResponseId: boolean;
|
|
49
|
+
conversationReplay: boolean;
|
|
50
|
+
}, hasPreviousReviewId: boolean): {
|
|
51
|
+
shouldLoad: boolean;
|
|
52
|
+
shouldWarn: boolean;
|
|
53
|
+
};
|
|
54
|
+
/** Reset the in-memory conversation store. Test-only. */
|
|
55
|
+
export declare function __resetConversationStoreForTest(): void;
|
|
56
|
+
export declare function getConversationHistory(reviewId: string, workspaceRoot?: string): Promise<ConversationTurn[] | undefined>;
|
|
57
|
+
export declare function storeConversation(reviewId: string, turns: ConversationTurn[], workspaceRoot?: string): Promise<void>;
|
|
26
58
|
/**
|
|
27
59
|
* Main entry point for all review calls.
|
|
28
60
|
* Resolves provider from config, delegates the call.
|
|
@@ -3,6 +3,21 @@ import { join, dirname } from 'node:path';
|
|
|
3
3
|
import { OpenAIProvider } from './providers/openai.js';
|
|
4
4
|
import { AnthropicProvider } from './providers/anthropic.js';
|
|
5
5
|
import { GoogleProvider } from './providers/google.js';
|
|
6
|
+
import { resolveCodexCredential } from './providers/codex-auth.js';
|
|
7
|
+
/**
|
|
8
|
+
* Resolve a concrete model string from either the flat string form or
|
|
9
|
+
* the per-tool object form. Returns undefined when nothing is set so the
|
|
10
|
+
* provider falls back to env/default.
|
|
11
|
+
*/
|
|
12
|
+
export function resolveModelForTool(model, toolName) {
|
|
13
|
+
if (model === undefined)
|
|
14
|
+
return undefined;
|
|
15
|
+
if (typeof model === 'string')
|
|
16
|
+
return model;
|
|
17
|
+
if (!toolName)
|
|
18
|
+
return undefined;
|
|
19
|
+
return model[toolName];
|
|
20
|
+
}
|
|
6
21
|
/**
|
|
7
22
|
* Resolve the effective provider name from config and env vars.
|
|
8
23
|
* Priority: per-request config > env REVIEW_PROVIDER > "openai"
|
|
@@ -52,26 +67,50 @@ function apiKeyFingerprint(key) {
|
|
|
52
67
|
return key;
|
|
53
68
|
return `${key.slice(0, 4)}...${key.slice(-4)}`;
|
|
54
69
|
}
|
|
55
|
-
function getProviderCacheKey(provider, config) {
|
|
70
|
+
function getProviderCacheKey(provider, resolvedModel, config) {
|
|
56
71
|
const apiKey = config?.api_key ?? resolveApiKey(provider);
|
|
57
72
|
return JSON.stringify({
|
|
58
73
|
provider,
|
|
59
|
-
model:
|
|
74
|
+
model: resolvedModel,
|
|
60
75
|
base_url: config?.base_url,
|
|
61
76
|
temperature: config?.temperature,
|
|
62
77
|
top_p: config?.top_p,
|
|
63
78
|
key_fp: apiKeyFingerprint(apiKey),
|
|
64
79
|
});
|
|
65
80
|
}
|
|
81
|
+
/**
|
|
82
|
+
* Resolve the OpenAI credential, falling back to the Codex CLI login when no
|
|
83
|
+
* explicit or env API key is present. Returns either an API key or a ChatGPT
|
|
84
|
+
* bearer credential (Sign in with ChatGPT).
|
|
85
|
+
*/
|
|
86
|
+
async function resolveOpenAiCredential(configApiKey) {
|
|
87
|
+
const explicitKey = configApiKey ?? process.env.OPENAI_API_KEY;
|
|
88
|
+
if (explicitKey)
|
|
89
|
+
return { apiKey: explicitKey };
|
|
90
|
+
const cred = await resolveCodexCredential();
|
|
91
|
+
if (!cred)
|
|
92
|
+
return {}; // let the provider throw its standard "no credential" error
|
|
93
|
+
if (cred.mode === 'apikey') {
|
|
94
|
+
console.error('[duul] Using OpenAI API key from Codex CLI login (~/.codex/auth.json)');
|
|
95
|
+
return { apiKey: cred.apiKey };
|
|
96
|
+
}
|
|
97
|
+
console.error('[duul] Using Sign in with ChatGPT credentials from Codex CLI login');
|
|
98
|
+
return { chatgpt: { accessToken: cred.accessToken, accountId: cred.accountId, refresh: cred.refresh } };
|
|
99
|
+
}
|
|
66
100
|
/**
|
|
67
101
|
* Create or retrieve a cached provider instance.
|
|
102
|
+
*
|
|
103
|
+
* `toolName` lets callers use the per-tool model override form:
|
|
104
|
+
* `{ plan: "...", code: "...", partition: "..." }`. The resolved model
|
|
105
|
+
* participates in the cache key so per-tool models don't collide.
|
|
68
106
|
*/
|
|
69
|
-
function getProvider(reviewerConfig) {
|
|
107
|
+
async function getProvider(reviewerConfig, toolName) {
|
|
70
108
|
const providerName = resolveProviderName(reviewerConfig?.provider);
|
|
71
109
|
const hasEphemeralKey = !!reviewerConfig?.api_key;
|
|
110
|
+
const resolvedModel = resolveModelForTool(reviewerConfig?.model, toolName);
|
|
72
111
|
// Per-request api_key → skip cache (ephemeral credential, don't leak into shared cache)
|
|
73
112
|
if (!hasEphemeralKey) {
|
|
74
|
-
const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
|
|
113
|
+
const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
|
|
75
114
|
if (providerCache.has(cacheKey)) {
|
|
76
115
|
return providerCache.get(cacheKey);
|
|
77
116
|
}
|
|
@@ -80,15 +119,21 @@ function getProvider(reviewerConfig) {
|
|
|
80
119
|
const constructorConfig = {
|
|
81
120
|
apiKey,
|
|
82
121
|
baseUrl: reviewerConfig?.base_url,
|
|
83
|
-
model:
|
|
122
|
+
model: resolvedModel,
|
|
84
123
|
temperature: reviewerConfig?.temperature,
|
|
85
124
|
topP: reviewerConfig?.top_p,
|
|
86
125
|
};
|
|
87
126
|
let provider;
|
|
127
|
+
// ChatGPT-login providers hold a rotating bearer token — never cache them.
|
|
128
|
+
let bypassCache = hasEphemeralKey;
|
|
88
129
|
switch (providerName) {
|
|
89
|
-
case 'openai':
|
|
90
|
-
|
|
130
|
+
case 'openai': {
|
|
131
|
+
const cred = await resolveOpenAiCredential(reviewerConfig?.api_key);
|
|
132
|
+
if (cred.chatgpt)
|
|
133
|
+
bypassCache = true;
|
|
134
|
+
provider = new OpenAIProvider({ ...constructorConfig, apiKey: cred.apiKey ?? apiKey, chatgpt: cred.chatgpt });
|
|
91
135
|
break;
|
|
136
|
+
}
|
|
92
137
|
case 'anthropic':
|
|
93
138
|
provider = new AnthropicProvider(constructorConfig);
|
|
94
139
|
break;
|
|
@@ -110,18 +155,18 @@ function getProvider(reviewerConfig) {
|
|
|
110
155
|
default:
|
|
111
156
|
throw new Error(`Unknown provider: ${providerName}`);
|
|
112
157
|
}
|
|
113
|
-
// Only cache env-based providers (not ephemeral
|
|
114
|
-
if (!
|
|
158
|
+
// Only cache stable env-based providers (not ephemeral keys or rotating tokens)
|
|
159
|
+
if (!bypassCache) {
|
|
115
160
|
// Evict oldest entry if cache is full
|
|
116
161
|
if (providerCache.size >= MAX_CACHE_SIZE) {
|
|
117
162
|
const oldestKey = providerCache.keys().next().value;
|
|
118
163
|
providerCache.delete(oldestKey);
|
|
119
164
|
console.error(`[duul] Provider cache full, evicted oldest entry`);
|
|
120
165
|
}
|
|
121
|
-
const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
|
|
166
|
+
const cacheKey = getProviderCacheKey(providerName, resolvedModel, reviewerConfig);
|
|
122
167
|
providerCache.set(cacheKey, provider);
|
|
123
168
|
}
|
|
124
|
-
console.error(`[duul] Created ${providerName} provider (model: ${
|
|
169
|
+
console.error(`[duul] Created ${providerName} provider (model: ${resolvedModel ?? 'default'}${toolName ? `, tool: ${toolName}` : ''}${bypassCache ? ', uncached' : ''})`);
|
|
125
170
|
return provider;
|
|
126
171
|
}
|
|
127
172
|
// --- Conversation history store (disk-persisted per workspace) ---
|
|
@@ -142,6 +187,11 @@ function conversationsPath(workspaceRoot) {
|
|
|
142
187
|
async function loadFromDisk(workspaceRoot) {
|
|
143
188
|
if (diskLoaded && lastWorkspaceRoot === workspaceRoot)
|
|
144
189
|
return;
|
|
190
|
+
// Switching workspaces: drop the previous workspace's entries so they aren't
|
|
191
|
+
// flushed into (or replayed from) the new workspace's conversations file.
|
|
192
|
+
if (lastWorkspaceRoot !== null && lastWorkspaceRoot !== workspaceRoot) {
|
|
193
|
+
memoryCache.clear();
|
|
194
|
+
}
|
|
145
195
|
lastWorkspaceRoot = workspaceRoot;
|
|
146
196
|
diskLoaded = true;
|
|
147
197
|
try {
|
|
@@ -189,7 +239,29 @@ function evictOldest() {
|
|
|
189
239
|
console.error(`[duul] Conversation store full, evicted oldest entry`);
|
|
190
240
|
}
|
|
191
241
|
}
|
|
192
|
-
|
|
242
|
+
/**
|
|
243
|
+
* Decide how to handle cross-round continuity for a provider, given whether the
|
|
244
|
+
* caller supplied a previousReviewId. Pure function so it can be unit-tested.
|
|
245
|
+
*
|
|
246
|
+
* - `shouldLoad`: replay-based providers need prior turns loaded and passed in.
|
|
247
|
+
* - `shouldWarn`: the caller asked for continuity but the provider supports
|
|
248
|
+
* neither native chaining nor replay, so context will be lost.
|
|
249
|
+
*/
|
|
250
|
+
export function continuityPlan(capabilities, hasPreviousReviewId) {
|
|
251
|
+
if (!hasPreviousReviewId)
|
|
252
|
+
return { shouldLoad: false, shouldWarn: false };
|
|
253
|
+
return {
|
|
254
|
+
shouldLoad: capabilities.conversationReplay,
|
|
255
|
+
shouldWarn: !capabilities.previousResponseId && !capabilities.conversationReplay,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
/** Reset the in-memory conversation store. Test-only. */
|
|
259
|
+
export function __resetConversationStoreForTest() {
|
|
260
|
+
memoryCache.clear();
|
|
261
|
+
diskLoaded = false;
|
|
262
|
+
lastWorkspaceRoot = null;
|
|
263
|
+
}
|
|
264
|
+
export async function getConversationHistory(reviewId, workspaceRoot) {
|
|
193
265
|
if (workspaceRoot)
|
|
194
266
|
await loadFromDisk(workspaceRoot);
|
|
195
267
|
const entry = memoryCache.get(reviewId);
|
|
@@ -198,7 +270,7 @@ async function getConversationHistory(reviewId, workspaceRoot) {
|
|
|
198
270
|
entry.lastAccessed = Date.now();
|
|
199
271
|
return entry.turns;
|
|
200
272
|
}
|
|
201
|
-
async function storeConversation(reviewId, turns, workspaceRoot) {
|
|
273
|
+
export async function storeConversation(reviewId, turns, workspaceRoot) {
|
|
202
274
|
evictOldest();
|
|
203
275
|
memoryCache.set(reviewId, { turns, lastAccessed: Date.now() });
|
|
204
276
|
if (workspaceRoot) {
|
|
@@ -210,21 +282,23 @@ async function storeConversation(reviewId, turns, workspaceRoot) {
|
|
|
210
282
|
* Resolves provider from config, delegates the call.
|
|
211
283
|
*/
|
|
212
284
|
export async function callReview(options) {
|
|
213
|
-
const provider = getProvider(options.reviewerConfig);
|
|
285
|
+
const provider = await getProvider(options.reviewerConfig, options.toolName);
|
|
214
286
|
// Log capability warnings for non-full-featured providers
|
|
215
287
|
if (!provider.capabilities.toolCalling && options.workspaceScope?.root) {
|
|
216
288
|
console.error(`[duul] Warning: ${provider.name} provider does not support tool calling. ` +
|
|
217
289
|
'Reviewer will not be able to explore the workspace. Consider providing more context via relevant_code/artifact_refs.');
|
|
218
290
|
}
|
|
219
|
-
|
|
220
|
-
|
|
291
|
+
const plan = continuityPlan(provider.capabilities, !!options.previousReviewId);
|
|
292
|
+
if (plan.shouldWarn) {
|
|
293
|
+
console.error(`[duul] Warning: ${provider.name} provider does not support conversation continuity. ` +
|
|
221
294
|
'Reviewer context from previous rounds will not be available.');
|
|
222
295
|
}
|
|
223
296
|
const workspaceRoot = options.workspaceScope?.root;
|
|
224
|
-
// Retrieve conversation history for providers
|
|
225
|
-
// OpenAI
|
|
297
|
+
// Retrieve conversation history for replay-based providers (Anthropic, and the
|
|
298
|
+
// OpenAI ChatGPT-login backend). Native-chaining providers (OpenAI api-key)
|
|
299
|
+
// pass previousReviewId straight through and don't need replay.
|
|
226
300
|
let conversationHistory;
|
|
227
|
-
if (
|
|
301
|
+
if (plan.shouldLoad) {
|
|
228
302
|
conversationHistory = await getConversationHistory(options.previousReviewId, workspaceRoot);
|
|
229
303
|
if (conversationHistory) {
|
|
230
304
|
console.error(`[duul] Loaded conversation history for ${options.previousReviewId} (${conversationHistory.length} turns)`);
|
|
@@ -234,8 +308,8 @@ export async function callReview(options) {
|
|
|
234
308
|
}
|
|
235
309
|
}
|
|
236
310
|
const result = await provider.review({ ...options, conversationHistory });
|
|
237
|
-
// Store conversation turns for future rounds (
|
|
238
|
-
if (result.conversationTurns?.length && provider.
|
|
311
|
+
// Store conversation turns for future rounds (replay-based providers only)
|
|
312
|
+
if (result.conversationTurns?.length && provider.capabilities.conversationReplay) {
|
|
239
313
|
await storeConversation(result.reviewId, result.conversationTurns, workspaceRoot);
|
|
240
314
|
console.error(`[duul] Stored conversation (${result.conversationTurns.length} turns) for ${result.reviewId}`);
|
|
241
315
|
}
|