@planningo/duul 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,26 @@ export declare function resolveWorkspaceScope(input: {
36
36
  tracked_only?: boolean;
37
37
  }): WorkspaceScope | null;
38
38
  export declare function readProjectFile(projectRoot: string, filePath: string, scope?: WorkspaceScope | null): Promise<string>;
39
+ /**
40
+ * Resolve a large text argument that may be supplied inline OR via a file path.
41
+ *
42
+ * Large MCP tool arguments (full plan/code markdown) are the single input that
43
+ * tool-calling models most often fail to serialize: when the intended value is
44
+ * big, the model can emit an empty `{}` for the whole argument object, which the
45
+ * MCP SDK then rejects with a `-32602` validation error — and the caller loops.
46
+ * Writing the content to a file with a normal Write call (a small, reliable
47
+ * tool schema) and passing a short relative path here sidesteps that failure.
48
+ *
49
+ * Returns the resolved text, or undefined when neither source yields content.
50
+ * Throws (with a labelled message) only when a file path was given but could
51
+ * not be read — the tool handler converts that into actionable retry guidance.
52
+ */
53
+ export declare function resolveInlineOrFile(opts: {
54
+ inline?: string | null;
55
+ file?: string | null;
56
+ scope: WorkspaceScope | null;
57
+ label: string;
58
+ }): Promise<string | undefined>;
39
59
  export declare function listProjectDirectory(projectRoot: string, dirPath: string, scope?: WorkspaceScope | null): Promise<string>;
40
60
  /**
41
61
  * Search for a pattern in files using rg (preferred), git grep, or grep fallback.
@@ -71,23 +71,27 @@ async function safePath(projectRoot, requestedPath, workingDirectories) {
71
71
  if (realRel.startsWith('..')) {
72
72
  throw new Error(`Symlink escape detected: ${requestedPath} resolves outside project root`);
73
73
  }
74
- // Block sensitive files
75
- const lower = rel.toLowerCase();
76
- if (lower.includes('.env') && !lower.endsWith('.example')) {
77
- throw new Error(`Access denied (sensitive file): ${requestedPath}`);
78
- }
79
- if (rel === 'node_modules' || rel.startsWith('node_modules/') || rel.startsWith('node_modules\\')) {
80
- throw new Error('Access denied: node_modules');
81
- }
82
- // Block additional paths (.git, build, dist)
83
- const topSegment = rel.split('/')[0].split('\\')[0];
84
- if (BLOCKED_PATHS.includes(topSegment)) {
85
- throw new Error(`Access denied: ${topSegment}`);
86
- }
87
- // Block large file extensions (.log)
88
- if (BLOCKED_EXTENSIONS.some((ext) => lower.endsWith(ext))) {
89
- throw new Error(`Access denied (blocked extension): ${requestedPath}`);
90
- }
74
+ // Block sensitive files — checked against BOTH the logical requested path and
75
+ // the symlink-resolved real path, so an in-root symlink with an innocuous name
76
+ // cannot point at an in-root secret (e.g. innocent.txt -> .env, gitcfg.txt -> .git/config).
77
+ const assertNotSensitive = (candidate) => {
78
+ const low = candidate.toLowerCase();
79
+ if (low.includes('.env') && !low.endsWith('.example')) {
80
+ throw new Error(`Access denied (sensitive file): ${requestedPath}`);
81
+ }
82
+ if (candidate === 'node_modules' || candidate.startsWith('node_modules/') || candidate.startsWith('node_modules\\')) {
83
+ throw new Error('Access denied: node_modules');
84
+ }
85
+ const topSegment = candidate.split('/')[0].split('\\')[0];
86
+ if (BLOCKED_PATHS.includes(topSegment)) {
87
+ throw new Error(`Access denied: ${topSegment}`);
88
+ }
89
+ if (BLOCKED_EXTENSIONS.some((ext) => low.endsWith(ext))) {
90
+ throw new Error(`Access denied (blocked extension): ${requestedPath}`);
91
+ }
92
+ };
93
+ assertNotSensitive(rel);
94
+ assertNotSensitive(realRel);
91
95
  return realResolved;
92
96
  }
93
97
  /**
@@ -247,6 +251,36 @@ export async function readProjectFile(projectRoot, filePath, scope) {
247
251
  }
248
252
  return readFile(resolved, 'utf-8');
249
253
  }
254
+ /**
255
+ * Resolve a large text argument that may be supplied inline OR via a file path.
256
+ *
257
+ * Large MCP tool arguments (full plan/code markdown) are the single input that
258
+ * tool-calling models most often fail to serialize: when the intended value is
259
+ * big, the model can emit an empty `{}` for the whole argument object, which the
260
+ * MCP SDK then rejects with a `-32602` validation error — and the caller loops.
261
+ * Writing the content to a file with a normal Write call (a small, reliable
262
+ * tool schema) and passing a short relative path here sidesteps that failure.
263
+ *
264
+ * Returns the resolved text, or undefined when neither source yields content.
265
+ * Throws (with a labelled message) only when a file path was given but could
266
+ * not be read — the tool handler converts that into actionable retry guidance.
267
+ */
268
+ export async function resolveInlineOrFile(opts) {
269
+ const { inline, file, scope, label } = opts;
270
+ if (typeof inline === 'string' && inline.trim().length > 0) {
271
+ return inline;
272
+ }
273
+ if (typeof file === 'string' && file.trim().length > 0) {
274
+ if (!scope) {
275
+ throw new Error(`${label}_file was provided ("${file}") but no workspace_root (or project_root) is set. ` +
276
+ 'Pass workspace_root so the file can be read.');
277
+ }
278
+ // Bypass tracked_only: the *_file artifact is the caller's own scratch file
279
+ // (e.g. .duul/plan.md), which is typically untracked by git.
280
+ return readProjectFile(scope.root, file, { ...scope, trackedOnly: false });
281
+ }
282
+ return undefined;
283
+ }
250
284
  export async function listProjectDirectory(projectRoot, dirPath, scope) {
251
285
  const resolved = await resolveToolPath(projectRoot, dirPath, scope ?? null);
252
286
  const stats = await lstat(resolved);
@@ -1,5 +1,5 @@
1
1
  import { validateProjectRoot } from '../filesystem.js';
2
- import { executeFilesystemTool } from '../filesystem-tools.js';
2
+ import { executeFilesystemTool, createReviewerByteBudget } from '../filesystem-tools.js';
3
3
  import { estimateCost } from '../pricing.js';
4
4
  const MAX_INPUT_CHARS = 400_000;
5
5
  const MAX_TOOL_ROUNDS = 10;
@@ -129,7 +129,8 @@ export class AnthropicProvider {
129
129
  capabilities = {
130
130
  structuredOutputs: false,
131
131
  toolCalling: true,
132
- previousResponseId: true, // simulated via conversation history
132
+ previousResponseId: false, // no native chaining — continuity via replay
133
+ conversationReplay: true,
133
134
  jsonSchemaStrict: false,
134
135
  };
135
136
  apiKey;
@@ -249,6 +250,7 @@ export class AnthropicProvider {
249
250
  };
250
251
  const toolCache = new Map();
251
252
  const callCounts = new Map();
253
+ const byteBudget = createReviewerByteBudget();
252
254
  for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
253
255
  const toolUses = body.content.filter((b) => b.type === 'tool_use');
254
256
  if (toolUses.length === 0 || body.stop_reason !== 'tool_use')
@@ -275,7 +277,7 @@ export class AnthropicProvider {
275
277
  toolResults.push({ type: 'tool_result', tool_use_id: call.id, content: budgetMessage(call.name, currentLevel) });
276
278
  continue;
277
279
  }
278
- const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope);
280
+ const result = await executeFilesystemTool(effectiveRoot, call.name, args, workspaceScope, byteBudget);
279
281
  toolCache.set(cacheKey, result);
280
282
  allUsedTools.push(`${call.name}(${argSummary})`);
281
283
  accumulatedToolChars += result.length;
@@ -0,0 +1,51 @@
1
+ /** ChatGPT-login base URL for the Responses API (POST {base}/responses). */
2
+ export declare const CHATGPT_BASE_URL = "https://chatgpt.com/backend-api/codex";
3
+ export interface CodexTokens {
4
+ id_token?: string;
5
+ access_token?: string;
6
+ refresh_token?: string;
7
+ account_id?: string;
8
+ }
9
+ export interface CodexAuth {
10
+ auth_mode?: string;
11
+ OPENAI_API_KEY?: string | null;
12
+ tokens?: CodexTokens;
13
+ last_refresh?: string;
14
+ }
15
+ export type CodexCredential = {
16
+ mode: 'apikey';
17
+ apiKey: string;
18
+ } | {
19
+ mode: 'chatgpt';
20
+ accessToken: string;
21
+ accountId: string;
22
+ refresh: () => Promise<string>;
23
+ };
24
+ /** Resolve the Codex home directory ($CODEX_HOME or ~/.codex). */
25
+ export declare function codexHome(): string;
26
+ /** Read and parse auth.json. Returns null when the file is missing or unparsable. */
27
+ export declare function loadCodexAuth(): CodexAuth | null;
28
+ /**
29
+ * Decode the `exp` (seconds since epoch) claim from a JWT without verifying it.
30
+ * Returns null when the token is not a decodable JWT.
31
+ */
32
+ export declare function jwtExp(token: string): number | null;
33
+ /**
34
+ * True when the token is expired or within EXPIRY_SKEW_SECONDS of expiring.
35
+ * Unknown expiry is treated as "not expired" so we don't refresh needlessly.
36
+ */
37
+ export declare function isTokenExpired(token: string, nowSeconds?: number): boolean;
38
+ /**
39
+ * Exchange the stored refresh_token for a fresh access token via the OpenAI
40
+ * OAuth endpoint, then persist the rotated tokens back to auth.json.
41
+ * Returns the updated CodexAuth. Throws on network/HTTP failure.
42
+ */
43
+ export declare function refreshCodexToken(auth: CodexAuth): Promise<CodexAuth>;
44
+ /**
45
+ * Resolve a usable credential from the Codex CLI login, or null when the CLI
46
+ * is not logged in. Refreshes an expired ChatGPT access token up front.
47
+ *
48
+ * The returned `refresh` callback (chatgpt mode) re-reads auth.json and rotates
49
+ * the token, so a provider can recover from a mid-review 401.
50
+ */
51
+ export declare function resolveCodexCredential(): Promise<CodexCredential | null>;
@@ -0,0 +1,178 @@
1
+ /**
2
+ * Codex CLI login support.
3
+ *
4
+ * Lets DUUL reuse the credentials produced by `codex login` (the OpenAI Codex
5
+ * CLI) instead of requiring a raw OPENAI_API_KEY. Two auth modes are handled:
6
+ *
7
+ * 1. "apikey" — auth.json carries an OPENAI_API_KEY; we just use it.
8
+ * 2. "chatgpt" — Sign in with ChatGPT (Plus/Pro/Team). auth.json carries an
9
+ * OAuth access token + account id. Requests go to the ChatGPT
10
+ * backend Responses endpoint with a bearer token; the token is
11
+ * refreshed via the OpenAI OAuth endpoint when near expiry.
12
+ *
13
+ * Credential file: $CODEX_HOME/auth.json (defaults to ~/.codex/auth.json).
14
+ *
15
+ * Protocol constants mirror the openai/codex `codex-rs` client so DUUL speaks
16
+ * the same dialect the CLI does.
17
+ */
18
+ import { readFileSync, writeFileSync, chmodSync } from 'node:fs';
19
+ import { homedir } from 'node:os';
20
+ import { join } from 'node:path';
21
+ /** ChatGPT-login base URL for the Responses API (POST {base}/responses). */
22
+ export const CHATGPT_BASE_URL = 'https://chatgpt.com/backend-api/codex';
23
+ /** OAuth token endpoint used to refresh a ChatGPT access token. */
24
+ const OAUTH_TOKEN_URL = process.env.CODEX_REFRESH_TOKEN_URL_OVERRIDE ?? 'https://auth.openai.com/oauth/token';
25
+ /** Public OAuth client id the Codex CLI registers under. */
26
+ const OAUTH_CLIENT_ID = process.env.CODEX_APP_SERVER_LOGIN_CLIENT_ID ?? 'app_EMoamEEZ73f0CkXaXp7hrann';
27
+ /** Refresh the access token when it has this many seconds (or fewer) of life left. */
28
+ const EXPIRY_SKEW_SECONDS = 5 * 60;
29
+ /** Resolve the Codex home directory ($CODEX_HOME or ~/.codex). */
30
+ export function codexHome() {
31
+ return process.env.CODEX_HOME ?? join(homedir(), '.codex');
32
+ }
33
+ function authPath() {
34
+ return join(codexHome(), 'auth.json');
35
+ }
36
+ /** Read and parse auth.json. Returns null when the file is missing or unparsable. */
37
+ export function loadCodexAuth() {
38
+ try {
39
+ const raw = readFileSync(authPath(), 'utf-8');
40
+ return JSON.parse(raw);
41
+ }
42
+ catch {
43
+ return null;
44
+ }
45
+ }
46
+ /**
47
+ * Decode the `exp` (seconds since epoch) claim from a JWT without verifying it.
48
+ * Returns null when the token is not a decodable JWT.
49
+ */
50
+ export function jwtExp(token) {
51
+ const parts = token.split('.');
52
+ if (parts.length < 2)
53
+ return null;
54
+ try {
55
+ let payload = parts[1].replace(/-/g, '+').replace(/_/g, '/');
56
+ payload += '='.repeat((4 - (payload.length % 4)) % 4);
57
+ const claims = JSON.parse(Buffer.from(payload, 'base64').toString('utf-8'));
58
+ return typeof claims.exp === 'number' ? claims.exp : null;
59
+ }
60
+ catch {
61
+ return null;
62
+ }
63
+ }
64
+ /**
65
+ * True when the token is expired or within EXPIRY_SKEW_SECONDS of expiring.
66
+ * Unknown expiry is treated as "not expired" so we don't refresh needlessly.
67
+ */
68
+ export function isTokenExpired(token, nowSeconds = Math.floor(Date.now() / 1000)) {
69
+ const exp = jwtExp(token);
70
+ if (exp === null)
71
+ return false;
72
+ return exp - nowSeconds <= EXPIRY_SKEW_SECONDS;
73
+ }
74
+ /**
75
+ * Exchange the stored refresh_token for a fresh access token via the OpenAI
76
+ * OAuth endpoint, then persist the rotated tokens back to auth.json.
77
+ * Returns the updated CodexAuth. Throws on network/HTTP failure.
78
+ */
79
+ export async function refreshCodexToken(auth) {
80
+ const refreshToken = auth.tokens?.refresh_token;
81
+ if (!refreshToken) {
82
+ throw new Error('Codex auth has no refresh_token; run `codex login` again.');
83
+ }
84
+ const res = await fetch(OAUTH_TOKEN_URL, {
85
+ method: 'POST',
86
+ headers: { 'Content-Type': 'application/json' },
87
+ body: JSON.stringify({
88
+ client_id: OAUTH_CLIENT_ID,
89
+ grant_type: 'refresh_token',
90
+ refresh_token: refreshToken,
91
+ }),
92
+ });
93
+ if (!res.ok) {
94
+ const body = await res.text().catch(() => '');
95
+ throw new Error(`Codex token refresh failed (${res.status}): ${body.slice(0, 200)}`);
96
+ }
97
+ const data = (await res.json());
98
+ const updated = {
99
+ ...auth,
100
+ tokens: {
101
+ ...auth.tokens,
102
+ ...(data.access_token ? { access_token: data.access_token } : {}),
103
+ ...(data.id_token ? { id_token: data.id_token } : {}),
104
+ // Refresh tokens rotate; keep the old one only if none is returned.
105
+ ...(data.refresh_token ? { refresh_token: data.refresh_token } : {}),
106
+ },
107
+ last_refresh: new Date().toISOString(),
108
+ };
109
+ try {
110
+ const path = authPath();
111
+ writeFileSync(path, JSON.stringify(updated, null, 2), { mode: 0o600 });
112
+ // `mode` only applies when the file is created; force 0600 on overwrite so a
113
+ // pre-existing, loosely-permissioned auth.json can't keep the refreshed token
114
+ // world/group readable.
115
+ chmodSync(path, 0o600);
116
+ }
117
+ catch (error) {
118
+ // Non-fatal: we can still use the refreshed token in-memory this run.
119
+ console.error(`[duul] Warning: could not persist refreshed Codex token: ${error instanceof Error ? error.message : error}`);
120
+ }
121
+ return updated;
122
+ }
123
+ /**
124
+ * Resolve a usable credential from the Codex CLI login, or null when the CLI
125
+ * is not logged in. Refreshes an expired ChatGPT access token up front.
126
+ *
127
+ * The returned `refresh` callback (chatgpt mode) re-reads auth.json and rotates
128
+ * the token, so a provider can recover from a mid-review 401.
129
+ */
130
+ export async function resolveCodexCredential() {
131
+ const auth = loadCodexAuth();
132
+ if (!auth)
133
+ return null;
134
+ const tokens = auth.tokens;
135
+ const chatgptCapable = !!(tokens?.access_token && tokens?.account_id);
136
+ const preferChatgpt = auth.auth_mode === 'chatgpt' || (!auth.OPENAI_API_KEY && chatgptCapable);
137
+ if (preferChatgpt && chatgptCapable) {
138
+ let accessToken = tokens.access_token;
139
+ if (isTokenExpired(accessToken)) {
140
+ // Token expired: refresh if possible, otherwise fall back to a stored API
141
+ // key rather than handing back a credential that will immediately 401.
142
+ if (tokens.refresh_token) {
143
+ try {
144
+ const refreshed = await refreshCodexToken(auth);
145
+ accessToken = refreshed.tokens?.access_token ?? accessToken;
146
+ }
147
+ catch (error) {
148
+ console.error(`[duul] Codex token refresh failed: ${error instanceof Error ? error.message : error}`);
149
+ if (auth.OPENAI_API_KEY)
150
+ return { mode: 'apikey', apiKey: auth.OPENAI_API_KEY };
151
+ throw error;
152
+ }
153
+ }
154
+ else if (auth.OPENAI_API_KEY) {
155
+ return { mode: 'apikey', apiKey: auth.OPENAI_API_KEY };
156
+ }
157
+ // else: no refresh path and no key — proceed with the expired token so the
158
+ // provider surfaces a clear auth error (better than a silent null).
159
+ }
160
+ return {
161
+ mode: 'chatgpt',
162
+ accessToken,
163
+ accountId: tokens.account_id,
164
+ refresh: async () => {
165
+ const current = loadCodexAuth() ?? auth;
166
+ const refreshed = await refreshCodexToken(current);
167
+ const next = refreshed.tokens?.access_token;
168
+ if (!next)
169
+ throw new Error('Codex token refresh returned no access_token');
170
+ return next;
171
+ },
172
+ };
173
+ }
174
+ if (auth.OPENAI_API_KEY) {
175
+ return { mode: 'apikey', apiKey: auth.OPENAI_API_KEY };
176
+ }
177
+ return null;
178
+ }
@@ -1,5 +1,5 @@
1
1
  import { validateProjectRoot } from '../filesystem.js';
2
- import { executeFilesystemTool } from '../filesystem-tools.js';
2
+ import { executeFilesystemTool, createReviewerByteBudget } from '../filesystem-tools.js';
3
3
  import { estimateCost } from '../pricing.js';
4
4
  const MAX_INPUT_CHARS = 400_000;
5
5
  const MAX_TOOL_ROUNDS = 10;
@@ -122,6 +122,7 @@ export class GoogleProvider {
122
122
  structuredOutputs: false,
123
123
  toolCalling: true,
124
124
  previousResponseId: false,
125
+ conversationReplay: false,
125
126
  jsonSchemaStrict: false,
126
127
  };
127
128
  apiKey;
@@ -209,6 +210,7 @@ export class GoogleProvider {
209
210
  };
210
211
  const toolCache = new Map();
211
212
  const callCounts = new Map();
213
+ const byteBudget = createReviewerByteBudget();
212
214
  for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
213
215
  const parts = body.candidates?.[0]?.content?.parts ?? [];
214
216
  const functionCalls = parts.filter((p) => 'functionCall' in p);
@@ -239,7 +241,7 @@ export class GoogleProvider {
239
241
  responseParts.push({ functionResponse: { name, response: { output: budgetMessage(name, currentLevel) } } });
240
242
  continue;
241
243
  }
242
- const result = await executeFilesystemTool(effectiveRoot, name, args, workspaceScope);
244
+ const result = await executeFilesystemTool(effectiveRoot, name, args, workspaceScope, byteBudget);
243
245
  toolCache.set(cacheKey, result);
244
246
  allUsedTools.push(`${name}(${argSummary})`);
245
247
  accumulatedToolChars += result.length;
@@ -1,5 +1,15 @@
1
1
  import type { z } from 'zod';
2
2
  import type { ReviewerProvider, ReviewCallOptions, ReviewCallResult, ProviderCapabilities } from './types.js';
3
+ /**
4
+ * ChatGPT-login (Codex CLI) credentials. When present the provider talks to the
5
+ * ChatGPT backend Responses endpoint with a bearer token instead of an API key.
6
+ */
7
+ export interface ChatgptAuth {
8
+ accessToken: string;
9
+ accountId: string;
10
+ /** Rotate the token (e.g. after a 401). Returns a fresh access token. */
11
+ refresh?: () => Promise<string>;
12
+ }
3
13
  export declare class OpenAIProvider implements ReviewerProvider {
4
14
  readonly name = "openai";
5
15
  readonly capabilities: ProviderCapabilities;
@@ -7,15 +17,38 @@ export declare class OpenAIProvider implements ReviewerProvider {
7
17
  private model;
8
18
  private temperature;
9
19
  private topP;
20
+ /**
21
+ * ChatGPT-backend mode. The endpoint is stateless (`store: false`): it does
22
+ * not support `previous_response_id`, `temperature`/`top_p`, or
23
+ * `max_output_tokens`, and it streams. We resend the full input each turn.
24
+ */
25
+ private readonly stateless;
26
+ private readonly baseURL?;
27
+ private readonly defaultHeaders?;
28
+ private readonly refresh?;
29
+ private readonly reasoningEffort;
10
30
  constructor(config?: {
11
31
  apiKey?: string;
12
32
  baseUrl?: string;
13
33
  model?: string;
14
34
  temperature?: number;
15
35
  topP?: number;
36
+ chatgpt?: ChatgptAuth;
16
37
  });
38
+ private buildClient;
17
39
  review<T extends z.ZodType>(options: ReviewCallOptions<T>): Promise<ReviewCallResult<z.infer<T>>>;
18
40
  private apiCallWithRetry;
41
+ /**
42
+ * Aggregate a streamed Responses call into a Response object.
43
+ *
44
+ * The ChatGPT backend delivers completed output items via
45
+ * `response.output_item.done` events and returns an EMPTY `output` array on
46
+ * `response.completed`, so we collect items from the stream ourselves. Usage
47
+ * and id come from `response.completed` (falling back to `response.created`).
48
+ */
49
+ private aggregateStream;
50
+ /** Return the first output_text string in the response, or null. */
51
+ private getOutputText;
19
52
  private extractStructuredOutput;
20
53
  private hasPendingFunctionCalls;
21
54
  private getFunctionCalls;