klaus-agent 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ // OpenAI Codex LLM provider
2
+ // Connects to ChatGPT's Codex backend via Responses API format.
3
+ // Requires a ChatGPT OAuth JWT token as apiKey.
4
+
5
+ import type {
6
+ LLMProvider,
7
+ LLMRequestOptions,
8
+ AssistantMessageEvent,
9
+ AssistantMessage,
10
+ AssistantContentBlock,
11
+ TokenUsage,
12
+ ThinkingLevel,
13
+ Message,
14
+ ToolDefinition,
15
+ } from "../llm/types.js";
16
+ import { platform, release, arch } from "node:os";
17
+
18
+ // --- Configuration ---
19
+
20
+ const DEFAULT_BASE_URL = "https://chatgpt.com/backend-api";
21
+ const JWT_CLAIM_PATH = "https://api.openai.com/auth";
22
+ const MAX_RETRIES = 3;
23
+ const BASE_DELAY_MS = 1000;
24
+
25
+ // --- Types ---
26
+
27
+ interface CodexRequestBody {
28
+ model: string;
29
+ store: boolean;
30
+ stream: boolean;
31
+ instructions: string;
32
+ input: ResponseInput;
33
+ tools?: ResponseTool[];
34
+ tool_choice: "auto";
35
+ parallel_tool_calls: boolean;
36
+ temperature?: number;
37
+ reasoning?: { effort: string; summary: string };
38
+ text: { verbosity: string };
39
+ include: string[];
40
+ prompt_cache_key?: string;
41
+ [key: string]: unknown;
42
+ }
43
+
44
+ type ResponseInput = ResponseInputItem[];
45
+
46
+ type ResponseInputItem =
47
+ | { type: "message"; role: "user" | "assistant"; content: ResponseContent[] }
48
+ | { type: "function_call"; id: string; call_id: string; name: string; arguments: string }
49
+ | { type: "function_call_output"; call_id: string; output: string };
50
+
51
+ type ResponseContent =
52
+ | { type: "input_text"; text: string }
53
+ | { type: "output_text"; text: string }
54
+ | { type: "input_image"; image_url: string };
55
+
56
+ interface ResponseTool {
57
+ type: "function";
58
+ name: string;
59
+ description: string;
60
+ parameters: Record<string, unknown>;
61
+ strict: null;
62
+ }
63
+
64
+ // --- Provider ---
65
+
66
+ export class OpenAICodexProvider implements LLMProvider {
67
+ private apiKey: string;
68
+ private baseUrl: string;
69
+
70
+ constructor(apiKey?: string, baseUrl?: string) {
71
+ this.apiKey = apiKey || process.env.OPENAI_CODEX_TOKEN || "";
72
+ this.baseUrl = baseUrl || DEFAULT_BASE_URL;
73
+ }
74
+
75
+ async *stream(options: LLMRequestOptions): AsyncIterable<AssistantMessageEvent> {
76
+ yield* this._streamOnce(options);
77
+ }
78
+
79
+ private async *_streamOnce(options: LLMRequestOptions): AsyncIterable<AssistantMessageEvent> {
80
+ const { model, systemPrompt, messages, tools, thinkingLevel, signal } = options;
81
+
82
+ if (!this.apiKey) {
83
+ throw new Error("No API key for openai-codex provider. Set OPENAI_CODEX_TOKEN or pass apiKey.");
84
+ }
85
+
86
+ const accountId = extractAccountId(this.apiKey);
87
+ const body = buildRequestBody(model, systemPrompt, messages, tools, thinkingLevel);
88
+ const headers = buildHeaders(accountId, this.apiKey);
89
+ const url = resolveCodexUrl(this.baseUrl);
90
+
91
+ // Fetch with retry for rate limits / transient errors
92
+ let response: Response | undefined;
93
+ let lastError: Error | undefined;
94
+
95
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
96
+ if (signal?.aborted) throw new Error("Request was aborted");
97
+
98
+ try {
99
+ response = await fetch(url, {
100
+ method: "POST",
101
+ headers,
102
+ body: JSON.stringify(body),
103
+ signal,
104
+ });
105
+
106
+ if (response.ok) break;
107
+
108
+ const errorText = await response.text();
109
+ if (attempt < MAX_RETRIES && isRetryableStatus(response.status, errorText)) {
110
+ await sleep(BASE_DELAY_MS * 2 ** attempt, signal);
111
+ continue;
112
+ }
113
+
114
+ throw new Error(parseErrorMessage(response.status, errorText));
115
+ } catch (err) {
116
+ if (err instanceof Error && (err.name === "AbortError" || err.message === "Request was aborted")) {
117
+ throw err;
118
+ }
119
+ lastError = err instanceof Error ? err : new Error(String(err));
120
+ if (attempt < MAX_RETRIES && !lastError.message.includes("usage limit")) {
121
+ await sleep(BASE_DELAY_MS * 2 ** attempt, signal);
122
+ continue;
123
+ }
124
+ throw lastError;
125
+ }
126
+ }
127
+
128
+ if (!response?.ok) throw lastError ?? new Error("Failed after retries");
129
+ if (!response.body) throw new Error("No response body");
130
+
131
+ // Parse SSE stream and map to AssistantMessageEvent
132
+ const contentBlocks: AssistantContentBlock[] = [];
133
+ const toolCalls = new Map<string, { id: string; name: string; args: string; callId: string }>();
134
+ let usage: TokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
135
+
136
+ for await (const event of mapCodexEvents(parseSSE(response))) {
137
+ const type = event.type as string;
138
+
139
+ // Text delta
140
+ if (type === "response.output_text.delta") {
141
+ const delta = (event as any).delta as string;
142
+ if (delta) {
143
+ if (contentBlocks.length === 0 || contentBlocks[contentBlocks.length - 1].type !== "text") {
144
+ contentBlocks.push({ type: "text", text: "" });
145
+ }
146
+ const block = contentBlocks[contentBlocks.length - 1];
147
+ if (block.type === "text") block.text += delta;
148
+ yield { type: "text", text: delta };
149
+ }
150
+ }
151
+
152
+ // Reasoning/thinking summary delta
153
+ if (type === "response.reasoning_summary_text.delta") {
154
+ const delta = (event as any).delta as string;
155
+ if (delta) {
156
+ if (contentBlocks.length === 0 || contentBlocks[contentBlocks.length - 1].type !== "thinking") {
157
+ contentBlocks.push({ type: "thinking", thinking: "" });
158
+ }
159
+ const block = contentBlocks[contentBlocks.length - 1];
160
+ if (block.type === "thinking") block.thinking += delta;
161
+ yield { type: "thinking", thinking: delta };
162
+ }
163
+ }
164
+
165
+ // New output item (function call start)
166
+ if (type === "response.output_item.added") {
167
+ const item = (event as any).item;
168
+ if (item?.type === "function_call") {
169
+ const id = item.id || item.call_id || `call_${toolCalls.size}`;
170
+ const callId = item.call_id || id;
171
+ const name = item.name || "";
172
+ toolCalls.set(id, { id, name, args: "", callId });
173
+ contentBlocks.push({ type: "tool_call", id: callId, name, input: {} });
174
+ yield { type: "tool_call_start", id: callId, name };
175
+ }
176
+ }
177
+
178
+ // Function call arguments delta
179
+ if (type === "response.function_call_arguments.delta") {
180
+ const itemId = (event as any).item_id;
181
+ const delta = (event as any).delta as string;
182
+ if (itemId && delta) {
183
+ const entry = toolCalls.get(itemId);
184
+ if (entry) {
185
+ entry.args += delta;
186
+ yield { type: "tool_call_delta", id: entry.callId, input: delta };
187
+ }
188
+ }
189
+ }
190
+
191
+ // Response completed — extract usage
192
+ if (type === "response.completed") {
193
+ const resp = (event as any).response;
194
+ if (resp?.usage) {
195
+ usage = {
196
+ inputTokens: resp.usage.input_tokens ?? 0,
197
+ outputTokens: resp.usage.output_tokens ?? 0,
198
+ totalTokens: (resp.usage.input_tokens ?? 0) + (resp.usage.output_tokens ?? 0),
199
+ cacheReadTokens: resp.usage.input_tokens_details?.cached_tokens,
200
+ };
201
+ }
202
+ }
203
+ }
204
+
205
+ // Finalize tool call inputs
206
+ for (const [, entry] of toolCalls) {
207
+ const block = contentBlocks.find((b) => b.type === "tool_call" && b.id === entry.callId);
208
+ if (block && block.type === "tool_call") {
209
+ try {
210
+ block.input = JSON.parse(entry.args || "{}");
211
+ } catch {
212
+ block.input = {};
213
+ }
214
+ }
215
+ }
216
+
217
+ const message: AssistantMessage = { role: "assistant", content: contentBlocks };
218
+ yield { type: "done", message, usage };
219
+ }
220
+ }
221
+
222
+ // --- JWT ---
223
+
224
+ function extractAccountId(token: string): string {
225
+ try {
226
+ const parts = token.split(".");
227
+ if (parts.length !== 3) throw new Error("Invalid JWT");
228
+ // JWT uses base64url encoding; convert to standard base64 for atob
229
+ const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
230
+ const payload = JSON.parse(atob(b64));
231
+ const accountId = payload?.[JWT_CLAIM_PATH]?.chatgpt_account_id;
232
+ if (!accountId) throw new Error("No chatgpt_account_id in token");
233
+ return accountId;
234
+ } catch {
235
+ throw new Error("Failed to extract accountId from Codex JWT token");
236
+ }
237
+ }
238
+
239
+ // --- Request building ---
240
+
241
+ function resolveCodexUrl(baseUrl: string): string {
242
+ const normalized = baseUrl.replace(/\/+$/, "");
243
+ if (normalized.endsWith("/codex/responses")) return normalized;
244
+ if (normalized.endsWith("/codex")) return `${normalized}/responses`;
245
+ return `${normalized}/codex/responses`;
246
+ }
247
+
248
+ function buildHeaders(accountId: string, token: string): Record<string, string> {
249
+ const userAgent = `klaus-agent (${platform()} ${release()}; ${arch()})`;
250
+ return {
251
+ "Authorization": `Bearer ${token}`,
252
+ "Content-Type": "application/json",
253
+ "Accept": "text/event-stream",
254
+ "chatgpt-account-id": accountId,
255
+ "originator": "klaus",
256
+ "OpenAI-Beta": "responses=experimental",
257
+ "User-Agent": userAgent,
258
+ };
259
+ }
260
+
261
+ function buildRequestBody(
262
+ model: string,
263
+ systemPrompt: string,
264
+ messages: Message[],
265
+ tools?: ToolDefinition[],
266
+ thinkingLevel?: ThinkingLevel,
267
+ ): CodexRequestBody {
268
+ const body: CodexRequestBody = {
269
+ model,
270
+ store: false,
271
+ stream: true,
272
+ instructions: systemPrompt,
273
+ input: mapMessages(messages),
274
+ text: { verbosity: "medium" },
275
+ include: ["reasoning.encrypted_content"],
276
+ tool_choice: "auto",
277
+ parallel_tool_calls: true,
278
+ };
279
+
280
+ if (tools?.length) {
281
+ body.tools = mapTools(tools);
282
+ }
283
+
284
+ const effort = mapReasoningEffort(model, thinkingLevel);
285
+ if (effort) {
286
+ body.reasoning = { effort, summary: "auto" };
287
+ }
288
+
289
+ return body;
290
+ }
291
+
292
+ function mapMessages(messages: Message[]): ResponseInput {
293
+ const input: ResponseInput = [];
294
+
295
+ for (const m of messages) {
296
+ if (m.role === "user") {
297
+ const content: ResponseContent[] = [];
298
+ if (typeof m.content === "string") {
299
+ content.push({ type: "input_text", text: m.content });
300
+ } else {
301
+ for (const block of m.content) {
302
+ if (block.type === "text") {
303
+ content.push({ type: "input_text", text: block.text });
304
+ } else if (block.type === "image") {
305
+ const url = block.source.type === "url"
306
+ ? block.source.url
307
+ : `data:${block.source.mediaType};base64,${block.source.data}`;
308
+ content.push({ type: "input_image", image_url: url });
309
+ }
310
+ }
311
+ }
312
+ input.push({ type: "message", role: "user", content });
313
+ } else if (m.role === "assistant") {
314
+ const content: ResponseContent[] = [];
315
+ for (const block of m.content) {
316
+ if (block.type === "text") {
317
+ content.push({ type: "output_text", text: block.text });
318
+ } else if (block.type === "tool_call") {
319
+ // Flush accumulated text before the tool call
320
+ if (content.length > 0) {
321
+ input.push({ type: "message", role: "assistant", content: [...content] });
322
+ content.length = 0;
323
+ }
324
+ input.push({
325
+ type: "function_call",
326
+ id: block.id,
327
+ call_id: block.id,
328
+ name: block.name,
329
+ arguments: JSON.stringify(block.input),
330
+ });
331
+ }
332
+ }
333
+ // Remaining text content
334
+ if (content.length > 0) {
335
+ input.push({ type: "message", role: "assistant", content });
336
+ }
337
+ } else if (m.role === "tool_result") {
338
+ const output = typeof m.content === "string"
339
+ ? m.content
340
+ : m.content.map((b) => b.type === "text" ? b.text : JSON.stringify(b)).join("\n");
341
+ input.push({
342
+ type: "function_call_output",
343
+ call_id: m.toolCallId,
344
+ output,
345
+ });
346
+ }
347
+ }
348
+
349
+ return input;
350
+ }
351
+
352
+ function mapTools(tools: ToolDefinition[]): ResponseTool[] {
353
+ return tools.map((t) => ({
354
+ type: "function" as const,
355
+ name: t.name,
356
+ description: t.description,
357
+ parameters: t.inputSchema,
358
+ strict: null,
359
+ }));
360
+ }
361
+
362
+ function mapReasoningEffort(modelId: string, level?: ThinkingLevel): string | undefined {
363
+ if (!level || level === "off") return undefined;
364
+
365
+ let effort: string = level;
366
+ // Clamp per model, following pi-mono's logic
367
+ const id = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
368
+ if ((id.startsWith("gpt-5.2") || id.startsWith("gpt-5.3") || id.startsWith("gpt-5.4")) && effort === "minimal") {
369
+ effort = "low";
370
+ }
371
+ if (id === "gpt-5.1" && effort === "xhigh") effort = "high";
372
+ if (id === "gpt-5.1-codex-mini") {
373
+ effort = effort === "high" || effort === "xhigh" ? "high" : "medium";
374
+ }
375
+
376
+ return effort;
377
+ }
378
+
379
+ // --- SSE parsing ---
380
+
381
+ async function* parseSSE(response: Response): AsyncGenerator<Record<string, unknown>> {
382
+ if (!response.body) return;
383
+
384
+ const reader = response.body.getReader();
385
+ const decoder = new TextDecoder();
386
+ let buffer = "";
387
+
388
+ try {
389
+ while (true) {
390
+ const { done, value } = await reader.read();
391
+ if (done) break;
392
+ buffer += decoder.decode(value, { stream: true });
393
+
394
+ let idx = buffer.indexOf("\n\n");
395
+ while (idx !== -1) {
396
+ const chunk = buffer.slice(0, idx);
397
+ buffer = buffer.slice(idx + 2);
398
+
399
+ const dataLines = chunk
400
+ .split("\n")
401
+ .filter((l) => l.startsWith("data:"))
402
+ .map((l) => l.slice(5).trim());
403
+
404
+ if (dataLines.length > 0) {
405
+ const data = dataLines.join("\n").trim();
406
+ if (data && data !== "[DONE]") {
407
+ try {
408
+ yield JSON.parse(data);
409
+ } catch { /* skip malformed JSON */ }
410
+ }
411
+ }
412
+ idx = buffer.indexOf("\n\n");
413
+ }
414
+ }
415
+ } finally {
416
+ try { await reader.cancel(); } catch { /* ignore */ }
417
+ try { reader.releaseLock(); } catch { /* ignore */ }
418
+ }
419
+ }
420
+
421
+ // --- Event mapping ---
422
+
423
+ async function* mapCodexEvents(
424
+ events: AsyncIterable<Record<string, unknown>>,
425
+ ): AsyncGenerator<Record<string, unknown>> {
426
+ for await (const event of events) {
427
+ const type = typeof event.type === "string" ? event.type : undefined;
428
+ if (!type) continue;
429
+
430
+ if (type === "error") {
431
+ const code = (event as any).code || "";
432
+ const message = (event as any).message || "";
433
+ throw new Error(`Codex error: ${message || code || JSON.stringify(event)}`);
434
+ }
435
+
436
+ if (type === "response.failed") {
437
+ const msg = (event as any).response?.error?.message;
438
+ throw new Error(msg || "Codex response failed");
439
+ }
440
+
441
+ // Normalize completion events
442
+ if (type === "response.done" || type === "response.completed" || type === "response.incomplete") {
443
+ yield { ...event, type: "response.completed" };
444
+ return;
445
+ }
446
+
447
+ yield event;
448
+ }
449
+ }
450
+
451
+ // --- Error handling ---
452
+
453
+ function isRetryableStatus(status: number, errorText: string): boolean {
454
+ if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) return true;
455
+ return /rate.?limit|overloaded|service.?unavailable|upstream.?connect|connection.?refused/i.test(errorText);
456
+ }
457
+
458
+ function parseErrorMessage(status: number, raw: string): string {
459
+ try {
460
+ const parsed = JSON.parse(raw) as {
461
+ error?: { code?: string; type?: string; message?: string; plan_type?: string; resets_at?: number };
462
+ };
463
+ const err = parsed?.error;
464
+ if (err) {
465
+ if (/usage_limit_reached|usage_not_included|rate_limit_exceeded/i.test(err.code || err.type || "")) {
466
+ const plan = err.plan_type ? ` (${err.plan_type.toLowerCase()} plan)` : "";
467
+ const mins = err.resets_at
468
+ ? Math.max(0, Math.round((err.resets_at * 1000 - Date.now()) / 60000))
469
+ : undefined;
470
+ const when = mins !== undefined ? ` Try again in ~${mins} min.` : "";
471
+ return `ChatGPT usage limit reached${plan}.${when}`.trim();
472
+ }
473
+ return err.message || raw;
474
+ }
475
+ } catch { /* not JSON */ }
476
+ return `Codex request failed (${status}): ${raw.slice(0, 200)}`;
477
+ }
478
+
479
+ function sleep(ms: number, signal?: AbortSignal): Promise<void> {
480
+ return new Promise((resolve, reject) => {
481
+ if (signal?.aborted) { reject(new Error("Request was aborted")); return; }
482
+ const onAbort = () => { clearTimeout(timeout); reject(new Error("Request was aborted")); };
483
+ const timeout = setTimeout(() => { signal?.removeEventListener("abort", onAbort); resolve(); }, ms);
484
+ signal?.addEventListener("abort", onAbort);
485
+ });
486
+ }
@@ -9,6 +9,7 @@ export const RETRYABLE_PATTERNS: Record<string, string[]> = {
9
9
  anthropic: [...COMMON_RETRYABLE, "rate_limit", "overloaded", "529"],
10
10
  openai: [...COMMON_RETRYABLE, "rate_limit"],
11
11
  google: [...COMMON_RETRYABLE],
12
+ codex: [...COMMON_RETRYABLE, "rate_limit", "usage_limit", "overloaded"],
12
13
  };
13
14
 
14
15
  export function isRetryableError(error: Error, patterns: string[]): boolean {