@bitkyc08/opencodex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.ko.md +164 -0
  3. package/README.md +165 -0
  4. package/README.zh-CN.md +162 -0
  5. package/gui/README.md +73 -0
  6. package/gui/dist/assets/index-C1wlp1SM.css +1 -0
  7. package/gui/dist/assets/index-C9y3iMF1.js +9 -0
  8. package/gui/dist/favicon.png +0 -0
  9. package/gui/dist/icons.svg +24 -0
  10. package/gui/dist/index.html +15 -0
  11. package/gui/dist/logo.png +0 -0
  12. package/package.json +56 -0
  13. package/scripts/postinstall.mjs +57 -0
  14. package/src/adapters/anthropic.ts +306 -0
  15. package/src/adapters/azure.ts +31 -0
  16. package/src/adapters/base.ts +20 -0
  17. package/src/adapters/google.ts +195 -0
  18. package/src/adapters/image.ts +23 -0
  19. package/src/adapters/openai-chat.ts +265 -0
  20. package/src/adapters/openai-responses.ts +43 -0
  21. package/src/bridge.ts +296 -0
  22. package/src/cli.ts +183 -0
  23. package/src/codex-catalog.ts +318 -0
  24. package/src/codex-inject.ts +186 -0
  25. package/src/config.ts +108 -0
  26. package/src/index.ts +20 -0
  27. package/src/init.ts +163 -0
  28. package/src/model-cache.ts +42 -0
  29. package/src/oauth/anthropic.ts +151 -0
  30. package/src/oauth/callback-server.ts +249 -0
  31. package/src/oauth/index.ts +235 -0
  32. package/src/oauth/key-providers.ts +126 -0
  33. package/src/oauth/kimi.ts +160 -0
  34. package/src/oauth/local-token-detect.ts +71 -0
  35. package/src/oauth/login-cli.ts +90 -0
  36. package/src/oauth/pkce.ts +15 -0
  37. package/src/oauth/store.ts +39 -0
  38. package/src/oauth/types.ts +22 -0
  39. package/src/oauth/xai.ts +234 -0
  40. package/src/responses/parser.ts +402 -0
  41. package/src/responses/schema.ts +145 -0
  42. package/src/router.ts +86 -0
  43. package/src/server.ts +522 -0
  44. package/src/service.ts +130 -0
  45. package/src/star-prompt.ts +50 -0
  46. package/src/types.ts +228 -0
  47. package/src/update.ts +64 -0
  48. package/src/vision/describe.ts +98 -0
  49. package/src/vision/index.ts +141 -0
  50. package/src/web-search/executor.ts +75 -0
  51. package/src/web-search/format-result.ts +45 -0
  52. package/src/web-search/index.ts +62 -0
  53. package/src/web-search/loop.ts +188 -0
  54. package/src/web-search/parse.ts +128 -0
  55. package/src/web-search/synthetic-tool.ts +42 -0
@@ -0,0 +1,15 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/png" href="/favicon.png" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <meta name="color-scheme" content="dark" />
8
+ <title>opencodex · proxy dashboard</title>
9
+ <script type="module" crossorigin src="/assets/index-C9y3iMF1.js"></script>
10
+ <link rel="stylesheet" crossorigin href="/assets/index-C1wlp1SM.css">
11
+ </head>
12
+ <body>
13
+ <div id="root"></div>
14
+ </body>
15
+ </html>
Binary file
package/package.json ADDED
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "@bitkyc08/opencodex",
3
+ "version": "0.1.0",
4
+ "description": "Universal provider proxy for OpenAI Codex — use any LLM with Codex CLI/App/SDK",
5
+ "type": "module",
6
+ "main": "src/index.ts",
7
+ "bin": {
8
+ "opencodex": "./src/cli.ts",
9
+ "ocx": "./src/cli.ts"
10
+ },
11
+ "files": [
12
+ "src",
13
+ "scripts/postinstall.mjs",
14
+ "gui/dist",
15
+ "README.md",
16
+ "LICENSE"
17
+ ],
18
+ "engines": {
19
+ "bun": ">=1.1.0"
20
+ },
21
+ "scripts": {
22
+ "dev": "bun run src/cli.ts start",
23
+ "start": "bun run src/cli.ts start",
24
+ "typecheck": "bun x tsc --noEmit",
25
+ "build:gui": "cd gui && bun install && bun run build",
26
+ "postinstall": "node scripts/postinstall.mjs",
27
+ "prepublishOnly": "bun run typecheck && bun run build:gui",
28
+ "release": "bun scripts/release.ts",
29
+ "release:watch": "bun scripts/release.ts watch"
30
+ },
31
+ "dependencies": {
32
+ "zod": "^4.0.0"
33
+ },
34
+ "devDependencies": {
35
+ "@types/bun": "latest",
36
+ "typescript": "^5.8.0"
37
+ },
38
+ "keywords": [
39
+ "codex",
40
+ "openai",
41
+ "proxy",
42
+ "llm",
43
+ "ollama",
44
+ "anthropic",
45
+ "responses-api"
46
+ ],
47
+ "repository": {
48
+ "type": "git",
49
+ "url": "git+https://github.com/lidge-jun/opencodex.git"
50
+ },
51
+ "homepage": "https://lidge-jun.github.io/opencodex/",
52
+ "bugs": {
53
+ "url": "https://github.com/lidge-jun/opencodex/issues"
54
+ },
55
+ "license": "MIT"
56
+ }
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * postinstall — one-time GitHub-star prompt during `npm install -g opencx`.
4
+ *
5
+ * Behavior:
6
+ * - TTY-only (skips CI / piped installs)
7
+ * - Requires `gh` CLI with auth (stars directly via `gh api`)
8
+ * - Prompts once; shares the ~/.opencodex/.star-prompted marker with the first-`ocx start` prompt
9
+ * (so bun users — where postinstall may not run on `-g` — still get it exactly once on start)
10
+ * - Never blocks the install (all errors silently caught)
11
+ */
12
+ import { spawnSync } from "node:child_process";
13
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
14
+ import { join } from "node:path";
15
+ import { createInterface } from "node:readline/promises";
16
+ import { homedir } from "node:os";
17
+
18
+ const REPO = "lidge-jun/opencodex";
19
+ const MARKER = join(homedir(), ".opencodex", ".star-prompted");
20
+
21
+ function ghInstalled() {
22
+ const r = spawnSync("gh", ["--version"], { stdio: "ignore", timeout: 3000, windowsHide: true });
23
+ return !r.error && r.status === 0;
24
+ }
25
+
26
+ function starRepo() {
27
+ const r = spawnSync("gh", ["api", "-X", "PUT", `/user/starred/${REPO}`],
28
+ { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"], timeout: 10000, windowsHide: true });
29
+ if (r.error) return { ok: false, error: r.error.message };
30
+ if (r.status !== 0) return { ok: false, error: (r.stderr || r.stdout || "").trim() || `gh exited ${r.status}` };
31
+ return { ok: true };
32
+ }
33
+
34
+ async function askYesNo(question) {
35
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
36
+ try {
37
+ const a = (await rl.question(question)).trim().toLowerCase();
38
+ return a === "" || a === "y" || a === "yes";
39
+ } finally {
40
+ rl.close();
41
+ }
42
+ }
43
+
44
+ async function main() {
45
+ if (!process.stdin.isTTY || !process.stdout.isTTY) return;
46
+ if (existsSync(MARKER)) return;
47
+ if (!ghInstalled()) return;
48
+
49
+ try { mkdirSync(join(homedir(), ".opencodex"), { recursive: true }); writeFileSync(MARKER, new Date().toISOString()); } catch { /* best-effort */ }
50
+
51
+ const yes = await askYesNo("[opencodex] Enjoying opencodex? Star it on GitHub? [Y/n] ");
52
+ if (!yes) return;
53
+ const r = starRepo();
54
+ console.log(r.ok ? "[opencodex] Thanks for the star! ⭐" : `[opencodex] Couldn't star automatically: ${r.error}`);
55
+ }
56
+
57
+ main().catch(() => { /* never fail the install */ });
@@ -0,0 +1,306 @@
1
+ import type { ProviderAdapter } from "./base";
2
+ import type {
3
+ AdapterEvent,
4
+ OcxAssistantMessage,
5
+ OcxContentPart,
6
+ OcxMessage,
7
+ OcxParsedRequest,
8
+ OcxProviderConfig,
9
+ OcxTextContent,
10
+ OcxThinkingContent,
11
+ OcxToolCall,
12
+ } from "../types";
13
+ import { ANTHROPIC_OAUTH_BETA, CLAUDE_CODE_SYSTEM_INSTRUCTION, applyClaudeToolPrefix, stripClaudeToolPrefix } from "../oauth/anthropic";
14
+ import { parseDataUrl } from "./image";
15
+
16
+ /** Map a user content part to an Anthropic content block (text or image source). */
17
+ function toAnthropicContentPart(p: OcxContentPart): unknown {
18
+ if (p.type === "image") {
19
+ const data = parseDataUrl(p.imageUrl);
20
+ return data
21
+ ? { type: "image", source: { type: "base64", media_type: data.mediaType, data: data.base64 } }
22
+ : { type: "image", source: { type: "url", url: p.imageUrl } };
23
+ }
24
+ return { type: "text", text: p.text };
25
+ }
26
+
27
+ /** Default `max_tokens` when Codex omits `max_output_tokens`. */
28
+ const DEFAULT_MAX_TOKENS = 8192;
29
+ /** Safe ceiling for `max_tokens` (thinking + visible output) across current Claude 4.x models. */
30
+ const REASONING_MAX_TOKENS_CEILING = 32_000;
31
+ /** Anthropic's documented minimum `thinking.budget_tokens`. */
32
+ const MIN_THINKING_BUDGET = 1024;
33
+ /** Visible-output room added above the thinking budget when sizing `max_tokens`. */
34
+ const OUTPUT_HEADROOM = 8192;
35
+ /** Minimum visible-output room kept below `max_tokens` (so `max_tokens > budget_tokens` always holds). */
36
+ const OUTPUT_FLOOR = 4096;
37
+
38
+ /** Map a Responses reasoning effort to an Anthropic extended-thinking budget (tokens, >= 1024). */
39
+ function reasoningBudget(effort: string): number {
40
+ switch (effort) {
41
+ case "minimal": return 1024;
42
+ case "low": return 4096;
43
+ case "high": return 16384;
44
+ case "xhigh": return 24576;
45
+ case "max": return 32000;
46
+ case "medium":
47
+ default: return 8192;
48
+ }
49
+ }
50
+
51
+ function messagesToAnthropicFormat(parsed: OcxParsedRequest, isOAuth: boolean): { system: string | undefined; messages: unknown[] } {
52
+ const system = parsed.context.systemPrompt?.join("\n\n") || undefined;
53
+ const messages: unknown[] = [];
54
+
55
+ for (const msg of parsed.context.messages) {
56
+ switch (msg.role) {
57
+ case "user":
58
+ case "developer": {
59
+ const content = typeof msg.content === "string"
60
+ ? msg.content
61
+ : (msg.content as OcxContentPart[]).map(toAnthropicContentPart);
62
+ messages.push({ role: "user", content });
63
+ break;
64
+ }
65
+ case "assistant": {
66
+ const aMsg = msg as OcxAssistantMessage;
67
+ const content: unknown[] = [];
68
+ for (const part of aMsg.content) {
69
+ if (part.type === "text") {
70
+ content.push({ type: "text", text: (part as OcxTextContent).text });
71
+ } else if (part.type === "thinking") {
72
+ const t = part as OcxThinkingContent;
73
+ content.push({ type: "thinking", thinking: t.thinking, ...(t.signature ? { signature: t.signature } : {}) });
74
+ } else if (part.type === "toolCall") {
75
+ const tc = part as OcxToolCall;
76
+ content.push({ type: "tool_use", id: tc.id, name: isOAuth ? applyClaudeToolPrefix(tc.name) : tc.name, input: tc.arguments });
77
+ }
78
+ }
79
+ messages.push({ role: "assistant", content });
80
+ break;
81
+ }
82
+ case "toolResult": {
83
+ // Anthropic tool_result accepts a string OR content blocks — render images natively
84
+ // (e.g. Codex view_image output) instead of dropping them.
85
+ const trContent = typeof msg.content === "string"
86
+ ? msg.content
87
+ : (msg.content as OcxContentPart[]).map(toAnthropicContentPart);
88
+ messages.push({
89
+ role: "user",
90
+ content: [{
91
+ type: "tool_result",
92
+ tool_use_id: msg.toolCallId,
93
+ content: trContent,
94
+ }],
95
+ });
96
+ break;
97
+ }
98
+ }
99
+ }
100
+
101
+ return { system, messages };
102
+ }
103
+
104
+ function toolsToAnthropicFormat(parsed: OcxParsedRequest, isOAuth: boolean): unknown[] | undefined {
105
+ if (!parsed.context.tools || parsed.context.tools.length === 0) return undefined;
106
+ return parsed.context.tools.map(t => ({
107
+ name: isOAuth ? applyClaudeToolPrefix(t.name) : t.name,
108
+ description: t.description,
109
+ input_schema: t.parameters,
110
+ }));
111
+ }
112
+
113
+ export function createAnthropicAdapter(provider: OcxProviderConfig): ProviderAdapter {
114
+ const isOAuth = provider.authMode === "oauth";
115
+ return {
116
+ name: "anthropic",
117
+
118
+ buildRequest(parsed: OcxParsedRequest) {
119
+ const { system, messages } = messagesToAnthropicFormat(parsed, isOAuth);
120
+ const tools = toolsToAnthropicFormat(parsed, isOAuth);
121
+
122
+ const body: Record<string, unknown> = {
123
+ model: parsed.modelId,
124
+ messages,
125
+ stream: parsed.stream,
126
+ max_tokens: parsed.options.maxOutputTokens ?? DEFAULT_MAX_TOKENS,
127
+ };
128
+ if (isOAuth) {
129
+ // Claude OAuth (Pro/Max) requires the first system block to be the Claude Code identity.
130
+ body.system = [
131
+ { type: "text", text: CLAUDE_CODE_SYSTEM_INSTRUCTION },
132
+ ...(system ? [{ type: "text", text: system }] : []),
133
+ ];
134
+ } else if (system) {
135
+ body.system = system;
136
+ }
137
+ if (tools) body.tools = tools;
138
+ if (parsed.options.temperature !== undefined) body.temperature = parsed.options.temperature;
139
+ if (parsed.options.topP !== undefined) body.top_p = parsed.options.topP;
140
+ if (parsed.options.stopSequences) body.stop_sequences = parsed.options.stopSequences;
141
+
142
+ if (parsed.options.reasoning) {
143
+ // Anthropic requires max_tokens > thinking.budget_tokens (max_tokens caps thinking +
144
+ // visible output) and budget_tokens >= 1024. Codex sends the SAME value for both, which
145
+ // 400s ("max_tokens must be greater than thinking.budget_tokens"). Size them so max_tokens
146
+ // always exceeds the budget within a model-safe ceiling, reserving room for visible output.
147
+ const maxOut = parsed.options.maxOutputTokens ?? DEFAULT_MAX_TOKENS;
148
+ const wantBudget = reasoningBudget(parsed.options.reasoning);
149
+ const maxTokens = Math.min(REASONING_MAX_TOKENS_CEILING, Math.max(maxOut, wantBudget + OUTPUT_HEADROOM));
150
+ const budget = Math.max(MIN_THINKING_BUDGET, Math.min(wantBudget, maxTokens - OUTPUT_FLOOR));
151
+ body.max_tokens = maxTokens;
152
+ body.thinking = { type: "enabled", budget_tokens: budget };
153
+ // Extended thinking disallows temperature != 1 and top_p — drop both or the API 400s.
154
+ delete body.temperature;
155
+ delete body.top_p;
156
+ }
157
+
158
+ if (parsed.options.toolChoice) {
159
+ const tc = parsed.options.toolChoice;
160
+ if (tc === "auto") body.tool_choice = { type: "auto" };
161
+ else if (tc === "none") body.tool_choice = { type: "none" };
162
+ else if (tc === "required") body.tool_choice = { type: "any" };
163
+ else if (typeof tc === "object" && "name" in tc) body.tool_choice = { type: "tool", name: isOAuth ? applyClaudeToolPrefix(tc.name) : tc.name };
164
+ }
165
+
166
+ const url = `${provider.baseUrl}/v1/messages`;
167
+ const headers: Record<string, string> = {
168
+ "Content-Type": "application/json",
169
+ "anthropic-version": "2023-06-01",
170
+ };
171
+ if (isOAuth) {
172
+ if (provider.apiKey) headers["Authorization"] = `Bearer ${provider.apiKey}`;
173
+ headers["anthropic-beta"] = ANTHROPIC_OAUTH_BETA;
174
+ } else if (provider.apiKey) {
175
+ headers["x-api-key"] = provider.apiKey;
176
+ }
177
+ if (provider.headers) Object.assign(headers, provider.headers);
178
+
179
+ return { url, method: "POST", headers, body: JSON.stringify(body) };
180
+ },
181
+
182
+ async *parseStream(response: Response): AsyncGenerator<AdapterEvent> {
183
+ if (!response.body) {
184
+ yield { type: "error", message: "No response body" };
185
+ return;
186
+ }
187
+
188
+ const reader = response.body.getReader();
189
+ const decoder = new TextDecoder();
190
+ let buffer = "";
191
+ let currentBlockType = "";
192
+ let currentToolCallId = "";
193
+ let currentToolCallName = "";
194
+
195
+ try {
196
+ while (true) {
197
+ const { done, value } = await reader.read();
198
+ if (done) break;
199
+ buffer += decoder.decode(value, { stream: true });
200
+
201
+ const lines = buffer.split("\n");
202
+ buffer = lines.pop() ?? "";
203
+
204
+ let currentEventType = "";
205
+ for (const line of lines) {
206
+ if (line.startsWith("event: ")) {
207
+ currentEventType = line.slice(7).trim();
208
+ continue;
209
+ }
210
+ if (!line.startsWith("data: ")) continue;
211
+ const payload = line.slice(6).trim();
212
+ if (!payload) continue;
213
+
214
+ let data: Record<string, unknown>;
215
+ try {
216
+ data = JSON.parse(payload) as Record<string, unknown>;
217
+ } catch {
218
+ continue;
219
+ }
220
+
221
+ switch (currentEventType || data.type) {
222
+ case "content_block_start": {
223
+ const block = data.content_block as { type: string; id?: string; name?: string } | undefined;
224
+ if (!block) break;
225
+ currentBlockType = block.type;
226
+ if (block.type === "tool_use") {
227
+ currentToolCallId = block.id ?? "";
228
+ currentToolCallName = isOAuth ? stripClaudeToolPrefix(block.name ?? "") : (block.name ?? "");
229
+ yield { type: "tool_call_start", id: currentToolCallId, name: currentToolCallName };
230
+ }
231
+ break;
232
+ }
233
+ case "content_block_delta": {
234
+ const delta = data.delta as Record<string, unknown> | undefined;
235
+ if (!delta) break;
236
+ if (delta.type === "text_delta" && typeof delta.text === "string") {
237
+ yield { type: "text_delta", text: delta.text };
238
+ } else if (delta.type === "thinking_delta" && typeof delta.thinking === "string") {
239
+ yield { type: "thinking_delta", thinking: delta.thinking };
240
+ } else if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") {
241
+ yield { type: "tool_call_delta", arguments: delta.partial_json };
242
+ }
243
+ break;
244
+ }
245
+ case "content_block_stop": {
246
+ if (currentBlockType === "tool_use") {
247
+ yield { type: "tool_call_end" };
248
+ currentToolCallId = "";
249
+ currentBlockType = "";
250
+ }
251
+ break;
252
+ }
253
+ case "message_delta": {
254
+ const usage = data.usage as Record<string, number> | undefined;
255
+ if (usage) {
256
+ yield {
257
+ type: "done",
258
+ usage: {
259
+ inputTokens: usage.input_tokens ?? 0,
260
+ outputTokens: usage.output_tokens ?? 0,
261
+ },
262
+ };
263
+ }
264
+ break;
265
+ }
266
+ case "message_stop": {
267
+ break;
268
+ }
269
+ case "error": {
270
+ const err = data.error as { message?: string } | undefined;
271
+ yield { type: "error", message: err?.message ?? "Anthropic error" };
272
+ return;
273
+ }
274
+ }
275
+ currentEventType = "";
276
+ }
277
+ }
278
+ } finally {
279
+ reader.releaseLock();
280
+ }
281
+ },
282
+
283
+ async parseResponse(response: Response): Promise<AdapterEvent[]> {
284
+ const json = await response.json() as Record<string, unknown>;
285
+ const events: AdapterEvent[] = [];
286
+ const content = json.content as { type: string; text?: string; id?: string; name?: string; input?: unknown }[] | undefined;
287
+ if (content) {
288
+ for (const block of content) {
289
+ if (block.type === "text" && block.text) {
290
+ events.push({ type: "text_delta", text: block.text });
291
+ } else if (block.type === "tool_use") {
292
+ events.push({ type: "tool_call_start", id: block.id ?? "", name: isOAuth ? stripClaudeToolPrefix(block.name ?? "") : (block.name ?? "") });
293
+ events.push({ type: "tool_call_delta", arguments: JSON.stringify(block.input ?? {}) });
294
+ events.push({ type: "tool_call_end" });
295
+ }
296
+ }
297
+ }
298
+ const usage = json.usage as Record<string, number> | undefined;
299
+ events.push({
300
+ type: "done",
301
+ usage: usage ? { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0 } : undefined,
302
+ });
303
+ return events;
304
+ },
305
+ };
306
+ }
@@ -0,0 +1,31 @@
1
+ import type { ProviderAdapter } from "./base";
2
+ import type { OcxParsedRequest, OcxProviderConfig } from "../types";
3
+ import { createResponsesPassthroughAdapter } from "./openai-responses";
4
+
5
+ export function createAzureAdapter(provider: OcxProviderConfig): ProviderAdapter & { passthrough: true } {
6
+ const inner = createResponsesPassthroughAdapter({
7
+ ...provider,
8
+ baseUrl: provider.baseUrl,
9
+ });
10
+
11
+ return {
12
+ ...inner,
13
+ name: "azure-openai",
14
+
15
+ buildRequest(parsed: OcxParsedRequest) {
16
+ const request = inner.buildRequest(parsed);
17
+ const headers = { ...request.headers };
18
+ if (provider.apiKey) {
19
+ headers["api-key"] = provider.apiKey;
20
+ delete headers["Authorization"];
21
+ }
22
+ const apiVersion = (provider.headers?.["api-version"]) ?? "2025-04-01-preview";
23
+ const separator = request.url.includes("?") ? "&" : "?";
24
+ return {
25
+ ...request,
26
+ url: `${request.url}${separator}api-version=${apiVersion}`,
27
+ headers,
28
+ };
29
+ },
30
+ };
31
+ }
@@ -0,0 +1,20 @@
1
+ import type { AdapterEvent, OcxParsedRequest } from "../types";
2
+
3
+ /** Metadata about the caller's incoming request, for auth-forwarding adapters. */
4
+ export interface IncomingMeta {
5
+ headers: Headers;
6
+ }
7
+
8
+ export interface ProviderAdapter {
9
+ name: string;
10
+
11
+ buildRequest(parsed: OcxParsedRequest, incoming?: IncomingMeta): {
12
+ url: string;
13
+ method: string;
14
+ headers: Record<string, string>;
15
+ body: string;
16
+ };
17
+
18
+ parseStream(response: Response): AsyncGenerator<AdapterEvent>;
19
+ parseResponse?(response: Response): Promise<AdapterEvent[]>;
20
+ }
@@ -0,0 +1,195 @@
1
+ import type { ProviderAdapter } from "./base";
2
+ import type {
3
+ AdapterEvent,
4
+ OcxAssistantMessage,
5
+ OcxContentPart,
6
+ OcxParsedRequest,
7
+ OcxProviderConfig,
8
+ OcxTextContent,
9
+ OcxToolCall,
10
+ } from "../types";
11
+ import { contentPartsToText, parseDataUrl } from "./image";
12
+
13
+ function messagesToGeminiFormat(parsed: OcxParsedRequest): { systemInstruction?: unknown; contents: unknown[] } {
14
+ const systemInstruction = parsed.context.systemPrompt?.length
15
+ ? { parts: [{ text: parsed.context.systemPrompt.join("\n\n") }] }
16
+ : undefined;
17
+
18
+ const contents: unknown[] = [];
19
+
20
+ for (const msg of parsed.context.messages) {
21
+ switch (msg.role) {
22
+ case "user":
23
+ case "developer": {
24
+ if (typeof msg.content === "string") {
25
+ contents.push({ role: "user", parts: [{ text: msg.content }] });
26
+ } else {
27
+ const parts = (msg.content as OcxContentPart[]).map(p => {
28
+ if (p.type === "image") {
29
+ const data = parseDataUrl(p.imageUrl);
30
+ // Gemini takes base64 via inline_data; a remote URL needs a mime type we don't have, so
31
+ // fall back to a short marker rather than inlining the URL as a huge text blob.
32
+ return data ? { inline_data: { mime_type: data.mediaType, data: data.base64 } } : { text: `[image: ${p.imageUrl}]` };
33
+ }
34
+ return { text: p.text };
35
+ });
36
+ contents.push({ role: "user", parts });
37
+ }
38
+ break;
39
+ }
40
+ case "assistant": {
41
+ const aMsg = msg as OcxAssistantMessage;
42
+ const parts: unknown[] = [];
43
+ for (const p of aMsg.content) {
44
+ if (p.type === "text") parts.push({ text: (p as OcxTextContent).text });
45
+ else if (p.type === "toolCall") {
46
+ const tc = p as OcxToolCall;
47
+ parts.push({ functionCall: { name: tc.name, args: tc.arguments } });
48
+ }
49
+ }
50
+ contents.push({ role: "model", parts });
51
+ break;
52
+ }
53
+ case "toolResult": {
54
+ contents.push({
55
+ role: "user",
56
+ parts: [{ functionResponse: { name: msg.toolName, response: { result: contentPartsToText(msg.content) } } }],
57
+ });
58
+ break;
59
+ }
60
+ }
61
+ }
62
+
63
+ return { systemInstruction, contents };
64
+ }
65
+
66
+ function toolsToGeminiFormat(parsed: OcxParsedRequest): unknown[] | undefined {
67
+ if (!parsed.context.tools?.length) return undefined;
68
+ return [{
69
+ functionDeclarations: parsed.context.tools.map(t => ({
70
+ name: t.name,
71
+ description: t.description,
72
+ parameters: t.parameters,
73
+ })),
74
+ }];
75
+ }
76
+
77
+ export function createGoogleAdapter(provider: OcxProviderConfig): ProviderAdapter {
78
+ return {
79
+ name: "google",
80
+
81
+ buildRequest(parsed: OcxParsedRequest) {
82
+ const { systemInstruction, contents } = messagesToGeminiFormat(parsed);
83
+ const tools = toolsToGeminiFormat(parsed);
84
+
85
+ const body: Record<string, unknown> = { contents };
86
+ if (systemInstruction) body.systemInstruction = systemInstruction;
87
+ if (tools) body.tools = tools;
88
+
89
+ const generationConfig: Record<string, unknown> = {};
90
+ if (parsed.options.maxOutputTokens) generationConfig.maxOutputTokens = parsed.options.maxOutputTokens;
91
+ if (parsed.options.temperature !== undefined) generationConfig.temperature = parsed.options.temperature;
92
+ if (parsed.options.topP !== undefined) generationConfig.topP = parsed.options.topP;
93
+ if (parsed.options.stopSequences) generationConfig.stopSequences = parsed.options.stopSequences;
94
+ if (Object.keys(generationConfig).length > 0) body.generationConfig = generationConfig;
95
+
96
+ const method = parsed.stream ? "streamGenerateContent" : "generateContent";
97
+ const streamParam = parsed.stream ? "?alt=sse" : "";
98
+ const url = `${provider.baseUrl}/v1beta/models/${parsed.modelId}:${method}${streamParam}`;
99
+
100
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
101
+ if (provider.apiKey) headers["x-goog-api-key"] = provider.apiKey;
102
+ if (provider.headers) Object.assign(headers, provider.headers);
103
+
104
+ return { url, method: "POST", headers, body: JSON.stringify(body) };
105
+ },
106
+
107
+ async *parseStream(response: Response): AsyncGenerator<AdapterEvent> {
108
+ if (!response.body) {
109
+ yield { type: "error", message: "No response body" };
110
+ return;
111
+ }
112
+
113
+ const reader = response.body.getReader();
114
+ const decoder = new TextDecoder();
115
+ let buffer = "";
116
+
117
+ try {
118
+ while (true) {
119
+ const { done, value } = await reader.read();
120
+ if (done) break;
121
+ buffer += decoder.decode(value, { stream: true });
122
+
123
+ const lines = buffer.split("\n");
124
+ buffer = lines.pop() ?? "";
125
+
126
+ for (const line of lines) {
127
+ if (!line.startsWith("data: ")) continue;
128
+ const payload = line.slice(6).trim();
129
+ if (!payload) continue;
130
+
131
+ let chunk: Record<string, unknown>;
132
+ try { chunk = JSON.parse(payload); } catch { continue; }
133
+
134
+ const candidates = chunk.candidates as { content?: { parts?: unknown[] }; finishReason?: string }[] | undefined;
135
+ if (!candidates?.length) continue;
136
+
137
+ const parts = candidates[0].content?.parts as { text?: string; functionCall?: { name: string; args: unknown } }[] | undefined;
138
+ if (parts) {
139
+ for (const part of parts) {
140
+ if (part.text) {
141
+ yield { type: "text_delta", text: part.text };
142
+ }
143
+ if (part.functionCall) {
144
+ const id = `call_${crypto.randomUUID().slice(0, 8)}`;
145
+ yield { type: "tool_call_start", id, name: part.functionCall.name };
146
+ yield { type: "tool_call_delta", arguments: JSON.stringify(part.functionCall.args ?? {}) };
147
+ yield { type: "tool_call_end" };
148
+ }
149
+ }
150
+ }
151
+
152
+ const usageMeta = chunk.usageMetadata as Record<string, number> | undefined;
153
+ if (candidates[0].finishReason && usageMeta) {
154
+ yield {
155
+ type: "done",
156
+ usage: {
157
+ inputTokens: usageMeta.promptTokenCount ?? 0,
158
+ outputTokens: usageMeta.candidatesTokenCount ?? 0,
159
+ },
160
+ };
161
+ }
162
+ }
163
+ }
164
+ yield { type: "done" };
165
+ } finally {
166
+ reader.releaseLock();
167
+ }
168
+ },
169
+
170
+ async parseResponse(response: Response): Promise<AdapterEvent[]> {
171
+ const json = await response.json() as Record<string, unknown>;
172
+ const events: AdapterEvent[] = [];
173
+
174
+ const candidates = json.candidates as { content?: { parts?: { text?: string; functionCall?: { name: string; args: unknown } }[] } }[] | undefined;
175
+ if (candidates?.[0]?.content?.parts) {
176
+ for (const part of candidates[0].content.parts) {
177
+ if (part.text) events.push({ type: "text_delta", text: part.text });
178
+ if (part.functionCall) {
179
+ const id = `call_${crypto.randomUUID().slice(0, 8)}`;
180
+ events.push({ type: "tool_call_start", id, name: part.functionCall.name });
181
+ events.push({ type: "tool_call_delta", arguments: JSON.stringify(part.functionCall.args ?? {}) });
182
+ events.push({ type: "tool_call_end" });
183
+ }
184
+ }
185
+ }
186
+
187
+ const usage = json.usageMetadata as Record<string, number> | undefined;
188
+ events.push({
189
+ type: "done",
190
+ usage: usage ? { inputTokens: usage.promptTokenCount ?? 0, outputTokens: usage.candidatesTokenCount ?? 0 } : undefined,
191
+ });
192
+ return events;
193
+ },
194
+ };
195
+ }