pi-antigravity-rotator 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,7 +2,11 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.1.0] - 2026-05-21
6
+
5
7
  ### Added
8
+ - **Codex Agent Integration Support**: Out-of-the-box support for connecting agentic frameworks like Codex (executing in VS Code or CLI) by routing OpenAI Responses API payloads and enabling native reasoning streaming, function-calling translation, and strict contract validation.
9
+ - **OpenAI Responses API Compatibility**: Full compatibility with the OpenAI Responses endpoint family (`POST /v1/responses`, `GET /v1/responses/<id>`, `DELETE /v1/responses/<id>`, `POST /v1/responses/<id>/cancel`, and `GET /v1/responses/<id>/input_items`). Includes full support for structured inputs, in-memory conversation/responses storage, and native tool-calling/reasoning visibility, tailored for advanced agentic frameworks.
6
10
  - **Hybrid Routing Policy**: Added optional `routingPolicy: "hybrid"` with weighted selection across timer priority, quota, tier, health, local token bucket state, and distance.
7
11
  - **Routing Inspector**: Added a dashboard modal that explains the currently selected route, candidate scores, and why each account was excluded for a model.
8
12
  - **Rate Limit Parser Module**: Extracted robust retry parsing into `src/rate-limit-parser.ts` with support for `Retry-After`, `x-ratelimit-reset`, `quotaResetDelay`, `quotaResetTimeStamp`, `retryDelay`, and duration strings.
package/README.md CHANGED
@@ -37,6 +37,7 @@ If this tool has helped you optimize your API usage and save costs, consider sup
37
37
 
38
38
  ## Features
39
39
 
40
+ - **Compatibility Adapters** -- Includes standard OpenAI-compatible `/v1/chat/completions` and Anthropic-compatible `/v1/messages` APIs. Features comprehensive **OpenAI Responses API compatibility** (`/v1/responses`), enabling seamless integration with advanced agentic systems like Codex.
40
41
  - **Per-model routing** -- Each model (Gemini Pro, Flash, Claude) routes to its own active account independently. Multiple agents using different models won't interfere with each other.
41
42
  - **Real-time quota monitoring** -- Polls Google's quota API every 5 minutes to track remaining usage per model per account
42
43
  - **Per-model timer tracking** -- Timer classification (`fresh`/`7d`/`5h`) is evaluated per model using each model's actual `resetTime` from the quota API, not a per-account estimate
@@ -379,6 +380,11 @@ Login now fails if Google does not return a project ID. No shared fallback.
379
380
  | `POST` | `/api/self-update` | Trigger npm self-update to latest version (admin-only) |
380
381
  | `POST` | `/v1internal:streamGenerateContent` | Native Antigravity proxy endpoint (used by pi) |
381
382
  | `GET` | `/v1/models` | OpenAI-compatible model list |
383
+ | `POST` | `/v1/responses` | OpenAI Responses-compatible create endpoint |
384
+ | `GET` | `/v1/responses/<id>` | Retrieve stored Responses result |
385
+ | `DELETE` | `/v1/responses/<id>` | Delete stored Responses result |
386
+ | `POST` | `/v1/responses/<id>/cancel` | Cancel an in-progress stored Responses result |
387
+ | `GET` | `/v1/responses/<id>/input_items` | List stored input items for a Responses result |
382
388
  | `POST` | `/v1/chat/completions` | OpenAI-compatible non-streaming chat adapter |
383
389
  | `POST` | `/v1/messages` | Anthropic-compatible non-streaming messages adapter |
384
390
 
@@ -400,6 +406,18 @@ curl http://localhost:51200/v1/chat/completions \
400
406
  }'
401
407
  ```
402
408
 
409
+ **OpenAI Responses-compatible example:**
410
+
411
+ ```bash
412
+ curl http://localhost:51200/v1/responses \
413
+ -H 'Content-Type: application/json' \
414
+ -d '{
415
+ "model": "gemini-3-flash",
416
+ "input": [{"role": "user", "content": [{"type": "input_text", "text": "Say pong"}]}],
417
+ "stream": false
418
+ }'
419
+ ```
420
+
403
421
  **Anthropic-compatible example:**
404
422
 
405
423
  ```bash
@@ -417,13 +435,56 @@ curl http://localhost:51200/v1/messages \
417
435
  Current adapter scope:
418
436
 
419
437
  - Text chat/messages.
438
+ - **Responses API compatibility**: Supports `POST /v1/responses` plus basic in-memory retrieve/delete/cancel/input-items endpoints for Codex-style agents.
420
439
  - **Model Role Support**: Fully supports the `"model"` role in chat message histories (e.g., from Pi or Hermes agents), validating and routing it identically to the `"assistant"` role.
421
440
  - **Request Normalization**: Automatically normalizes loose inputs (non-array messages), legacy prompt/input fields (e.g. `prompt` strings/arrays or `input` structures), and raw native Antigravity requests (`request.contents`) into standard OpenAI/Anthropic format.
422
441
  - **Native Reasoning visibility**: Models with thinking capabilities (Gemini 3 Pro, Gemini 3.5 Flash, Claude Sonnet 4.6 Thinking) automatically expose their interleaved thinking blocks in real-time as OpenAI `reasoning_content` or Anthropic `thinking_delta` chunks.
423
442
  - Streaming mode is supported as compatibility SSE. The adapter buffers the upstream Antigravity stream, then emits one OpenAI/Anthropic-compatible final delta. Native token-by-token pass-through is not implemented yet.
424
443
  - Image input is supported when sent as base64 data URL (`OpenAI image_url.url = data:image/...;base64,...`) or Anthropic base64 source (`type=image`, `source.type=base64`).
425
444
  - **Tool/function calling is fully supported** (OpenAI `tools`/`tool_choice` format and Anthropic `tool_use`/`tool_result` via standard translation to Gemini `functionDeclarations`).
445
+ - Responses-compatible tool support is currently limited to `type: "function"` tools. Built-in tools like `web_search`, `file_search`, `computer`, or `code_interpreter` are rejected explicitly.
446
+
447
+
448
+ ## Connecting Codex / VS Code Agents
449
+
450
+ `pi-antigravity-rotator` can act as the multi-account rotation backend for agentic frameworks, including **Codex** executing in VS Code or in the terminal.
451
+
452
+ Since Codex uses the standard **OpenAI Responses API**, it can seamlessly route its developer-agent workflows through the rotator.
453
+
454
+ ### Configuration for Codex
455
+
456
+ To connect Codex to your local rotator:
457
+
458
+ 1. **Configure the API Base URL**:
459
+ In Codex settings (e.g. in your `.codex` config or VS Code configuration), set the OpenAI API base URL to point to your rotator's compatibility adapter:
460
+ ```json
461
+ "codex.openai.apiBase": "http://localhost:51200/v1"
462
+ ```
463
+ *(Or set the environment variable `OPENAI_BASE_URL=http://localhost:51200/v1` in the workspace shell).*
464
+
465
+ 2. **Set the API Key / Admin Token**:
466
+ If you have set a `PI_ROTATOR_ADMIN_TOKEN` for your rotator, configure that token as the API key. Otherwise, any non-empty placeholder string (e.g., `sk-antigravity`) works:
467
+ ```json
468
+ "codex.openai.apiKey": "your-rotator-admin-token-here"
469
+ ```
470
+ *(Or set the environment variable `OPENAI_API_KEY=...` in the shell).*
471
+
472
+ 3. **Select a Supported Model**:
473
+ Configure Codex to target one of the following models supported by the rotator (which will be mapped to the best available Google Antigravity account/model under the hood):
474
+ - `gemini-3.5-flash` or `gemini-3.5-flash-high` / `gemini-3.5-flash-low` (Recommended for fast general reasoning)
475
+ - `gemini-3-pro` or `gemini-pro-agent` (For deep reasoning)
476
+ - `claude-sonnet-4-6` or `claude-3-5-sonnet` (Alternative routing fallback)
477
+
478
+ Example Codex configuration entry:
479
+ ```json
480
+ "codex.model": "gemini-3.5-flash-high"
481
+ ```
482
+
483
+ ### Features Enabled for Codex Agents
426
484
 
485
+ - **Native Reasoning Visibility**: If using models with thinking enabled (e.g., `gemini-3.5-flash-high`), interleaved reasoning/thinking blocks are streamed back in real-time as OpenAI `reasoning_content` chunks. This lets Codex inspect the model's inner thoughts before it acts.
486
+ - **Function / Tool Routing**: Function calls emitted by Codex are fully translated to Gemini `functionCalls` and returned back to Codex safely, enabling full agentic capabilities.
487
+ - **Strict Validation**: The rotator strictly validates the Responses input contract and rejects unsupported tools (e.g., `web_search`) proactively to ensure Codex doesn't hit unexpected runtime exceptions.
427
488
 
428
489
  ## Development Checks
429
490
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-antigravity-rotator",
3
- "version": "2.0.0",
3
+ "version": "2.1.0",
4
4
  "description": "Multi-account rotation proxy for Google Antigravity with per-model routing, real-time quota tracking, and infringement detection",
5
5
  "license": "MIT",
6
6
  "type": "module",
package/src/compat.ts CHANGED
@@ -69,6 +69,24 @@ export interface OpenAIChatCompletionRequest {
69
69
  [key: string]: unknown;
70
70
  }
71
71
 
72
+ export interface OpenAIResponsesRequest {
73
+ model: string;
74
+ input?: unknown;
75
+ instructions?: string | Array<{ type: string; text?: string;[key: string]: unknown }> | null;
76
+ stream?: boolean;
77
+ temperature?: number;
78
+ max_output_tokens?: number;
79
+ tools?: Array<Record<string, unknown>>;
80
+ tool_choice?: unknown;
81
+ reasoning?: { effort?: string | null;[key: string]: unknown } | null;
82
+ metadata?: Record<string, string>;
83
+ store?: boolean;
84
+ previous_response_id?: string | null;
85
+ conversation?: unknown;
86
+ parallel_tool_calls?: boolean;
87
+ [key: string]: unknown;
88
+ }
89
+
72
90
  export interface AnthropicMessagesRequest {
73
91
  model: string;
74
92
  messages: ChatMessage[];
@@ -88,6 +106,39 @@ export interface CompatCompletion {
88
106
  toolCalls?: OpenAIToolCall[];
89
107
  }
90
108
 
109
+ interface ResponseOutputText {
110
+ type: "output_text";
111
+ text: string;
112
+ annotations: unknown[];
113
+ }
114
+
115
+ interface ResponseMessageOutputItem {
116
+ id: string;
117
+ type: "message";
118
+ status: "completed";
119
+ role: "assistant";
120
+ content: ResponseOutputText[];
121
+ }
122
+
123
+ interface ResponseFunctionCallOutputItem {
124
+ id: string;
125
+ type: "function_call";
126
+ call_id: string;
127
+ name: string;
128
+ arguments: string;
129
+ status: "completed";
130
+ }
131
+
132
+ type ResponseOutputItem = ResponseMessageOutputItem | ResponseFunctionCallOutputItem;
133
+
134
+ interface StoredResponseEntry {
135
+ response: Record<string, unknown>;
136
+ inputItems: Array<Record<string, unknown>>;
137
+ conversationMessages: ChatMessage[];
138
+ callIdToName: Map<string, string>;
139
+ expiresAt: number;
140
+ }
141
+
91
142
  // ---------------------------------------------------------------------------
92
143
  // Model-specific specs — mirrors Antigravity-Manager model_specs.json
93
144
  // ---------------------------------------------------------------------------
@@ -106,6 +157,7 @@ const MODEL_SPECS: Record<string, ModelSpec> = {
106
157
  "gemini-3.1-pro-low": { maxOutputTokens: 65535, thinkingBudget: 1001, isThinking: true },
107
158
  "gemini-3.1-pro-preview": { maxOutputTokens: 65535, thinkingBudget: 10001, isThinking: true },
108
159
  "gemini-3.5-flash": { maxOutputTokens: 65536, thinkingBudget: 10000, isThinking: true },
160
+ "gemini-3.5-flash-medium": { maxOutputTokens: 65536, thinkingBudget: 4000, isThinking: true },
109
161
  "gemini-3.5-flash-low": { maxOutputTokens: 65536, thinkingBudget: 4000, isThinking: true },
110
162
  "gemini-3.5-flash-high": { maxOutputTokens: 65536, thinkingBudget: 10000, isThinking: true },
111
163
  "gemini-3-flash": { maxOutputTokens: 65536, thinkingBudget: 4000, isThinking: true },
@@ -176,6 +228,39 @@ function isNonEmptyString(value: unknown): value is string {
176
228
  */
177
229
  const thoughtSignatureCache = new Map<string, string>();
178
230
  const THOUGHT_SIGNATURE_CACHE_MAX = 500;
231
+ const RESPONSES_STORE_TTL_MS = 6 * 60 * 60 * 1000;
232
+ const RESPONSES_STORE_MAX = 500;
233
+ const responsesStore = new Map<string, StoredResponseEntry>();
234
+
235
+ function makeCompatId(prefix: string): string {
236
+ return `${prefix}_${Date.now().toString(36)}${Math.random().toString(36).slice(2, 10)}`;
237
+ }
238
+
239
+ function pruneResponsesStore(now = Date.now()): void {
240
+ for (const [id, entry] of responsesStore) {
241
+ if (entry.expiresAt <= now) responsesStore.delete(id);
242
+ }
243
+ while (responsesStore.size > RESPONSES_STORE_MAX) {
244
+ const oldest = responsesStore.keys().next();
245
+ if (oldest.done) break;
246
+ responsesStore.delete(oldest.value);
247
+ }
248
+ }
249
+
250
+ function getStoredResponse(id: string): StoredResponseEntry | null {
251
+ pruneResponsesStore();
252
+ return responsesStore.get(id) || null;
253
+ }
254
+
255
+ function setStoredResponse(id: string, entry: StoredResponseEntry): void {
256
+ pruneResponsesStore();
257
+ responsesStore.set(id, entry);
258
+ pruneResponsesStore();
259
+ }
260
+
261
+ export function resetResponsesStoreForTests(): void {
262
+ responsesStore.clear();
263
+ }
179
264
 
180
265
  function cacheThoughtSignature(callId: string, signature: string): void {
181
266
  if (thoughtSignatureCache.size >= THOUGHT_SIGNATURE_CACHE_MAX) {
@@ -486,6 +571,103 @@ function normalizeContentBlocks(content: unknown): ChatMessage["content"] {
486
571
  return blocks.length > 0 ? blocks : "";
487
572
  }
488
573
 
574
+ function normalizeInstructionsContent(content: OpenAIResponsesRequest["instructions"]): ChatMessage["content"] {
575
+ if (typeof content === "string" || content === null || content === undefined) return content ?? "";
576
+ return normalizeContentBlocks(content);
577
+ }
578
+
579
+ function contentToResponseInputBlocks(content: ChatMessage["content"], role: string): Array<Record<string, unknown>> {
580
+ if (typeof content === "string") {
581
+ if (!content) return [];
582
+ return [{ type: role === "assistant" || role === "model" ? "output_text" : "input_text", text: content }];
583
+ }
584
+ if (!Array.isArray(content)) return [];
585
+ return cleanCacheControl(content).flatMap((part) => {
586
+ if (!isRecord(part)) return [];
587
+ if (typeof part.text === "string") {
588
+ return [{ type: role === "assistant" || role === "model" ? "output_text" : "input_text", text: part.text }];
589
+ }
590
+ if (part.type === "image_url" && isRecord(part.image_url) && typeof part.image_url.url === "string") {
591
+ return [{ type: "input_image", image_url: part.image_url.url }];
592
+ }
593
+ return [part];
594
+ });
595
+ }
596
+
597
+ type ParsedResponsesInput = {
598
+ inputItems: Array<Record<string, unknown>>;
599
+ messages: ChatMessage[];
600
+ };
601
+
602
+ function parseResponsesInput(input: unknown, callIdToName: Map<string, string> = new Map()): ParsedResponsesInput {
603
+ if (typeof input === "string") {
604
+ return {
605
+ inputItems: [{ id: makeCompatId("in"), type: "message", role: "user", content: [{ type: "input_text", text: input }] }],
606
+ messages: [{ role: "user", content: input }],
607
+ };
608
+ }
609
+ if (!Array.isArray(input)) return { inputItems: [], messages: [] };
610
+
611
+ const inputItems: Array<Record<string, unknown>> = [];
612
+ const messages: ChatMessage[] = [];
613
+
614
+ for (const rawItem of input) {
615
+ if (typeof rawItem === "string") {
616
+ inputItems.push({ id: makeCompatId("in"), type: "message", role: "user", content: [{ type: "input_text", text: rawItem }] });
617
+ messages.push({ role: "user", content: rawItem });
618
+ continue;
619
+ }
620
+ if (!isRecord(rawItem)) continue;
621
+
622
+ if (rawItem.type === "function_call_output" && typeof rawItem.call_id === "string") {
623
+ const outputText = typeof rawItem.output === "string" ? rawItem.output : JSON.stringify(rawItem.output ?? "");
624
+ const toolName = typeof rawItem.name === "string" ? rawItem.name : (callIdToName.get(rawItem.call_id) || "unknown");
625
+ inputItems.push({
626
+ id: makeCompatId("in"),
627
+ type: "function_call_output",
628
+ call_id: rawItem.call_id,
629
+ output: outputText,
630
+ });
631
+ messages.push({ role: "tool", content: outputText, name: toolName, tool_call_id: rawItem.call_id });
632
+ continue;
633
+ }
634
+
635
+ if (rawItem.type === "function_call" && typeof rawItem.name === "string") {
636
+ const callId = typeof rawItem.call_id === "string" ? rawItem.call_id : makeCompatId("call");
637
+ const args = typeof rawItem.arguments === "string" ? rawItem.arguments : JSON.stringify(rawItem.arguments ?? {});
638
+ inputItems.push({
639
+ id: makeCompatId("in"),
640
+ type: "function_call",
641
+ call_id: callId,
642
+ name: rawItem.name,
643
+ arguments: args,
644
+ });
645
+ messages.push({
646
+ role: "assistant",
647
+ content: null,
648
+ tool_calls: [{ id: callId, type: "function", function: { name: rawItem.name, arguments: args } }],
649
+ });
650
+ continue;
651
+ }
652
+
653
+ const isMessage = rawItem.type === "message" || typeof rawItem.role === "string" || "content" in rawItem;
654
+ if (!isMessage) continue;
655
+ const rawRole = typeof rawItem.role === "string" ? rawItem.role : "user";
656
+ const role = rawRole === "developer" ? "system" : rawRole;
657
+ if (!["system", "user", "assistant", "model", "tool"].includes(role)) continue;
658
+ const content = "content" in rawItem ? normalizeContentBlocks(rawItem.content) : extractTextFromUnknownContent(rawItem);
659
+ inputItems.push({
660
+ id: makeCompatId("in"),
661
+ type: "message",
662
+ role: rawRole,
663
+ content: contentToResponseInputBlocks(content, role),
664
+ });
665
+ messages.push({ role: role as ChatMessage["role"], content });
666
+ }
667
+
668
+ return { inputItems, messages };
669
+ }
670
+
489
671
  function messagesFromResponsesInput(input: unknown): ChatMessage[] | null {
490
672
  if (typeof input === "string") return [{ role: "user", content: input }];
491
673
  if (!Array.isArray(input)) return null;
@@ -545,6 +727,55 @@ export function normalizeOpenAIChatCompletionRequest(value: unknown): unknown {
545
727
  return messages ? { ...value, messages } : value;
546
728
  }
547
729
 
730
+ export function normalizeOpenAIResponsesRequest(value: unknown): unknown {
731
+ if (!isRecord(value)) return value;
732
+
733
+ // Normalize and filter tools array.
734
+ // Codex / VS Code Responses API sends tools in two layouts:
735
+ // 1. Standard: { type: "function", function: { name, description, parameters } }
736
+ // 2. Flat (v2): { type: "function", name, description, parameters } ← Codex uses this
737
+ // We normalize flat entries to standard layout, drop non-function types, and drop
738
+ // any function entries still missing a name after normalization.
739
+ let normalized: Record<string, unknown> = { ...value };
740
+ if (Array.isArray(value.tools)) {
741
+ const before = value.tools.length;
742
+ const filtered: unknown[] = [];
743
+ for (const t of value.tools) {
744
+ if (!isRecord(t) || typeof t.type !== "string") continue;
745
+ if (t.type !== "function") continue;
746
+
747
+ // Flat format: name at root level → lift into .function wrapper
748
+ if (isNonEmptyString(t.name) && !isRecord(t.function)) {
749
+ filtered.push({
750
+ type: "function",
751
+ function: {
752
+ name: t.name,
753
+ ...(typeof t.description === "string" ? { description: t.description } : {}),
754
+ ...(isRecord(t.parameters) ? { parameters: t.parameters } : {}),
755
+ },
756
+ });
757
+ continue;
758
+ }
759
+
760
+ // Standard format: must have .function.name
761
+ if (isRecord(t.function) && isNonEmptyString(t.function.name)) {
762
+ filtered.push(t);
763
+ }
764
+ // else: drop (function entry without a usable name)
765
+ }
766
+ const dropped = before - filtered.length;
767
+ if (dropped > 0) {
768
+ compatLogger.warn(`Filtered ${dropped} unsupported/unnamed tool(s) from Responses request (kept ${filtered.length} function tools)`);
769
+ }
770
+ normalized = { ...normalized, tools: filtered.length > 0 ? filtered : undefined };
771
+ }
772
+
773
+ if ("input" in normalized) return normalized;
774
+ if ("messages" in normalized) return { ...normalized, input: normalized.messages };
775
+ if ("prompt" in normalized) return { ...normalized, input: normalized.prompt };
776
+ return normalized;
777
+ }
778
+
548
779
  export function normalizeAnthropicMessagesRequest(value: unknown): unknown {
549
780
  if (!isRecord(value) || Array.isArray(value.messages)) return value;
550
781
  const messages = "messages" in value
@@ -555,6 +786,22 @@ export function normalizeAnthropicMessagesRequest(value: unknown): unknown {
555
786
  return messages ? { ...value, messages } : value;
556
787
  }
557
788
 
789
+ function validateResponsesTools(value: unknown): string[] {
790
+ if (value === undefined) return [];
791
+ if (!Array.isArray(value)) return ["body.tools must be an array when provided"];
792
+ const errors: string[] = [];
793
+ for (const tool of value) {
794
+ if (!isRecord(tool)) {
795
+ errors.push("each tool must be an object");
796
+ continue;
797
+ }
798
+ if (tool.type !== "function") {
799
+ errors.push(`only function tools are supported (got: ${tool.type})`);
800
+ }
801
+ }
802
+ return errors;
803
+ }
804
+
558
805
  export function validateOpenAIChatCompletionRequest(value: unknown): { ok: true; value: OpenAIChatCompletionRequest } | { ok: false; errors: string[] } {
559
806
  if (!isRecord(value)) return { ok: false, errors: ["body must be a JSON object"] };
560
807
  const errors: string[] = [];
@@ -569,6 +816,33 @@ export function validateOpenAIChatCompletionRequest(value: unknown): { ok: true;
569
816
  return errors.length > 0 ? { ok: false, errors } : { ok: true, value: value as unknown as OpenAIChatCompletionRequest };
570
817
  }
571
818
 
819
+ export function validateOpenAIResponsesRequest(value: unknown): { ok: true; value: OpenAIResponsesRequest } | { ok: false; errors: string[] } {
820
+ if (!isRecord(value)) return { ok: false, errors: ["body must be a JSON object"] };
821
+ const errors: string[] = [];
822
+ if (!isNonEmptyString(value.model)) errors.push("body.model must be a non-empty string");
823
+ if (value.stream !== undefined && typeof value.stream !== "boolean") errors.push("body.stream must be boolean when provided");
824
+ if (value.temperature !== undefined && typeof value.temperature !== "number") errors.push("body.temperature must be number when provided");
825
+ if (value.max_output_tokens !== undefined && typeof value.max_output_tokens !== "number") errors.push("body.max_output_tokens must be number when provided");
826
+ if (value.store !== undefined && typeof value.store !== "boolean") errors.push("body.store must be boolean when provided");
827
+ if (value.previous_response_id !== undefined && value.previous_response_id !== null && !isNonEmptyString(value.previous_response_id)) {
828
+ errors.push("body.previous_response_id must be a non-empty string or null");
829
+ }
830
+ if (value.conversation !== undefined && value.conversation !== null) {
831
+ errors.push("body.conversation is not supported; use previous_response_id instead");
832
+ }
833
+ if (value.metadata !== undefined && !isRecord(value.metadata)) errors.push("body.metadata must be an object when provided");
834
+ if (value.reasoning !== undefined && value.reasoning !== null && !isRecord(value.reasoning)) {
835
+ errors.push("body.reasoning must be an object when provided");
836
+ } else if (isRecord(value.reasoning) && value.reasoning.effort !== undefined && value.reasoning.effort !== null && typeof value.reasoning.effort !== "string") {
837
+ errors.push("body.reasoning.effort must be a string when provided");
838
+ }
839
+ if (value.instructions !== undefined && value.instructions !== null && typeof value.instructions !== "string" && !Array.isArray(value.instructions)) {
840
+ errors.push("body.instructions must be a string or content array when provided");
841
+ }
842
+ errors.push(...validateResponsesTools(value.tools));
843
+ return errors.length > 0 ? { ok: false, errors } : { ok: true, value: value as unknown as OpenAIResponsesRequest };
844
+ }
845
+
572
846
  export function validateAnthropicMessagesRequest(value: unknown): { ok: true; value: AnthropicMessagesRequest } | { ok: false; errors: string[] } {
573
847
  if (!isRecord(value)) return { ok: false, errors: ["body must be a JSON object"] };
574
848
  const errors: string[] = [];
@@ -583,6 +857,154 @@ export function validateAnthropicMessagesRequest(value: unknown): { ok: true; va
583
857
 
584
858
  type GeminiContent = { role: "user" | "model"; parts: unknown[] };
585
859
 
860
+ type ResponsesConversionResult = {
861
+ chatRequest: OpenAIChatCompletionRequest;
862
+ inputItems: Array<Record<string, unknown>>;
863
+ conversationMessages: ChatMessage[];
864
+ previousResponseId: string | null;
865
+ };
866
+
867
+ function convertResponsesToChatRequest(input: OpenAIResponsesRequest): ResponsesConversionResult {
868
+ const previousResponseId = input.previous_response_id ?? null;
869
+ const previous = previousResponseId ? getStoredResponse(previousResponseId) : null;
870
+ if (previousResponseId && !previous) {
871
+ throw new Error(`previous_response_id not found: ${previousResponseId}`);
872
+ }
873
+
874
+ const parsed = parseResponsesInput(input.input, previous?.callIdToName);
875
+ const conversationMessages = [
876
+ ...(previous?.conversationMessages ?? []),
877
+ ...parsed.messages,
878
+ ];
879
+ const chatMessages = [
880
+ ...(input.instructions ? [{ role: "system" as const, content: normalizeInstructionsContent(input.instructions) }] : []),
881
+ ...conversationMessages,
882
+ ];
883
+
884
+ return {
885
+ chatRequest: {
886
+ model: input.model,
887
+ messages: chatMessages,
888
+ stream: input.stream,
889
+ temperature: input.temperature,
890
+ max_tokens: input.max_output_tokens,
891
+ tools: input.tools as OpenAITool[] | undefined,
892
+ tool_choice: input.tool_choice,
893
+ reasoning_effort: typeof input.reasoning?.effort === "string" ? input.reasoning.effort : undefined,
894
+ parallel_tool_calls: input.parallel_tool_calls,
895
+ },
896
+ inputItems: parsed.inputItems,
897
+ conversationMessages,
898
+ previousResponseId,
899
+ };
900
+ }
901
+
902
+ function responseUsageFromCompletion(completion: CompatCompletion): Record<string, unknown> {
903
+ return {
904
+ input_tokens: completion.inputTokens,
905
+ input_tokens_details: { cached_tokens: 0 },
906
+ output_tokens: completion.outputTokens,
907
+ output_tokens_details: { reasoning_tokens: 0 },
908
+ total_tokens: completion.inputTokens + completion.outputTokens,
909
+ };
910
+ }
911
+
912
+ function buildResponsesOutput(completion: CompatCompletion): { output: ResponseOutputItem[]; outputText: string; callIdToName: Map<string, string> } {
913
+ const output: ResponseOutputItem[] = [];
914
+ const callIdToName = new Map<string, string>();
915
+ // Emit reasoning item first (before message text) when thinking content is present
916
+ if (completion.thinkingText) {
917
+ output.push({
918
+ id: makeCompatId("rs"),
919
+ type: "reasoning",
920
+ status: "completed",
921
+ summary: [{ type: "summary_text", text: completion.thinkingText }],
922
+ } as unknown as ResponseOutputItem);
923
+ }
924
+ if (completion.text) {
925
+ output.push({
926
+ id: makeCompatId("msg"),
927
+ type: "message",
928
+ status: "completed",
929
+ role: "assistant",
930
+ content: [{ type: "output_text", text: completion.text, annotations: [] }],
931
+ });
932
+ }
933
+ for (const toolCall of completion.toolCalls ?? []) {
934
+ callIdToName.set(toolCall.id, toolCall.function.name);
935
+ output.push({
936
+ id: makeCompatId("fc"),
937
+ type: "function_call",
938
+ call_id: toolCall.id,
939
+ name: toolCall.function.name,
940
+ arguments: toolCall.function.arguments,
941
+ status: "completed",
942
+ });
943
+ }
944
+ return { output, outputText: completion.text, callIdToName };
945
+ }
946
+
947
+ function buildAssistantMessageFromCompletion(completion: CompatCompletion): ChatMessage {
948
+ return completion.toolCalls && completion.toolCalls.length > 0
949
+ ? { role: "assistant", content: completion.text || null, tool_calls: completion.toolCalls }
950
+ : { role: "assistant", content: completion.text };
951
+ }
952
+
953
+ function buildResponsesResponse(
954
+ request: OpenAIResponsesRequest,
955
+ responseId: string,
956
+ createdAt: number,
957
+ completion: CompatCompletion,
958
+ status: "in_progress" | "completed" | "cancelled",
959
+ previousResponseId: string | null,
960
+ ): Record<string, unknown> {
961
+ const { output, outputText } = buildResponsesOutput(completion);
962
+ return {
963
+ id: responseId,
964
+ object: "response",
965
+ created_at: createdAt,
966
+ status,
967
+ error: null,
968
+ incomplete_details: null,
969
+ instructions: request.instructions ?? null,
970
+ max_output_tokens: request.max_output_tokens ?? null,
971
+ model: request.model,
972
+ output,
973
+ output_text: outputText,
974
+ parallel_tool_calls: request.parallel_tool_calls ?? true,
975
+ previous_response_id: previousResponseId,
976
+ reasoning: { effort: request.reasoning?.effort ?? null },
977
+ store: request.store !== false,
978
+ temperature: request.temperature ?? null,
979
+ text: { format: { type: "text" } },
980
+ tool_choice: request.tool_choice ?? "auto",
981
+ tools: Array.isArray(request.tools) ? request.tools : [],
982
+ top_p: null,
983
+ truncation: "disabled",
984
+ usage: responseUsageFromCompletion(completion),
985
+ metadata: isRecord(request.metadata) ? request.metadata : {},
986
+ };
987
+ }
988
+
989
+ function saveResponsesEntry(
990
+ response: Record<string, unknown>,
991
+ inputItems: Array<Record<string, unknown>>,
992
+ conversationMessages: ChatMessage[],
993
+ completion: CompatCompletion,
994
+ ): void {
995
+ const responseId = typeof response.id === "string" ? response.id : null;
996
+ if (!responseId) return;
997
+ const { callIdToName } = buildResponsesOutput(completion);
998
+ const mergedConversation = [...conversationMessages, buildAssistantMessageFromCompletion(completion)];
999
+ setStoredResponse(responseId, {
1000
+ response,
1001
+ inputItems,
1002
+ conversationMessages: mergedConversation,
1003
+ callIdToName,
1004
+ expiresAt: Date.now() + RESPONSES_STORE_TTL_MS,
1005
+ });
1006
+ }
1007
+
586
1008
  export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): RequestBody {
587
1009
  // Separate system messages from conversation turns
588
1010
  const systemParts: string[] = [];
@@ -674,7 +1096,14 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
674
1096
  // Include tool_call_id so Gemini can pass it as tool_use_id to Claude
675
1097
  const toolCallId = msg.tool_call_id;
676
1098
  let responseData: unknown;
677
- try { responseData = JSON.parse(responseText); } catch { responseData = { output: responseText }; }
1099
+ try {
1100
+ const parsed = JSON.parse(responseText);
1101
+ // Cloud Code proto requires functionResponse.response to be an object, not an array.
1102
+ // Wrap arrays (and other non-object primitives) so the field is always a plain object.
1103
+ responseData = (parsed !== null && typeof parsed === "object" && !Array.isArray(parsed))
1104
+ ? parsed
1105
+ : { output: parsed };
1106
+ } catch { responseData = { output: responseText }; }
678
1107
  // Include id only for Claude — Gemini native models reject the id field in functionResponse
679
1108
  contents.push({ role: "user", parts: [{ functionResponse: { ...(isClaude && toolCallId ? { id: toolCallId } : {}), name: fnName, response: responseData } }] });
680
1109
  } else {
@@ -767,7 +1196,7 @@ export function openAIToAntigravityBody(input: OpenAIChatCompletionRequest): Req
767
1196
 
768
1197
  let mappedModel = input.model;
769
1198
  if (mappedModel === "gemini-3.1-pro-high") mappedModel = "gemini-pro-agent";
770
- if (mappedModel === "gemini-3.5-flash-high" || mappedModel === "gemini-3.5-flash") mappedModel = "gemini-3-flash-agent";
1199
+ if (mappedModel === "gemini-3.5-flash-high" || mappedModel === "gemini-3.5-flash" || mappedModel === "gemini-3.5-flash-medium") mappedModel = "gemini-3-flash-agent";
771
1200
  if (mappedModel === "gpt-oss-120b") mappedModel = "gpt-oss-120b-medium";
772
1201
 
773
1202
  return {
@@ -927,6 +1356,10 @@ function writeJson(res: ServerResponse, status: number, payload: unknown, header
927
1356
  res.end(JSON.stringify(payload));
928
1357
  }
929
1358
 
1359
+ function writeResponsesEvent(res: ServerResponse, payload: Record<string, unknown>): void {
1360
+ res.write(`data: ${JSON.stringify(payload)}\n\n`);
1361
+ }
1362
+
930
1363
  function summarizeCompatRequest(body: RequestBody): string {
931
1364
  const request = isRecord(body.request) ? body.request : {};
932
1365
  const contents = Array.isArray(request.contents) ? request.contents : [];
@@ -1148,6 +1581,246 @@ async function streamCompatSse(
1148
1581
  return { text, inputTokens, outputTokens, responseId, toolCalls: undefined };
1149
1582
  }
1150
1583
 
1584
+ async function streamResponsesSse(
1585
+ body: unknown,
1586
+ req: IncomingMessage,
1587
+ res: ServerResponse,
1588
+ request: OpenAIResponsesRequest,
1589
+ responseId: string,
1590
+ previousResponseId: string | null,
1591
+ createdAt: number,
1592
+ ): Promise<CompatCompletion> {
1593
+ const nodeStream = Readable.fromWeb(body as import("node:stream/web").ReadableStream);
1594
+ let text = "";
1595
+ let thinkingText = "";
1596
+ let inputTokens = 0;
1597
+ let outputTokens = 0;
1598
+ const toolCalls: OpenAIToolCall[] = [];
1599
+ let toolCallIndex = 0;
1600
+ let nextOutputIndex = 0;
1601
+ let messageOutputIndex = -1;
1602
+ let messageItemId = "";
1603
+ let reasoningOutputIndex = -1;
1604
+ let reasoningItemId = "";
1605
+ let reasoningDone = false;
1606
+ let reqClosed = false;
1607
+ req.once("close", () => { reqClosed = true; });
1608
+
1609
+ res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no" });
1610
+ const emptyCompletion: CompatCompletion = { text: "", thinkingText: undefined, inputTokens: 0, outputTokens: 0, toolCalls: [] };
1611
+ writeResponsesEvent(res, { type: "response.created", response: buildResponsesResponse(request, responseId, createdAt, emptyCompletion, "in_progress", previousResponseId) });
1612
+ writeResponsesEvent(res, { type: "response.in_progress", response: buildResponsesResponse(request, responseId, createdAt, emptyCompletion, "in_progress", previousResponseId) });
1613
+
1614
+ let tailBuffer = "";
1615
+ try {
1616
+ for await (const chunk of nodeStream) {
1617
+ if (reqClosed) {
1618
+ nodeStream.destroy();
1619
+ break;
1620
+ }
1621
+ tailBuffer += chunk.toString();
1622
+ let newlineIdx;
1623
+ while ((newlineIdx = tailBuffer.indexOf("\n")) >= 0) {
1624
+ const line = tailBuffer.slice(0, newlineIdx).trim();
1625
+ tailBuffer = tailBuffer.slice(newlineIdx + 1);
1626
+ if (!line.startsWith("data:")) continue;
1627
+ const payload = line.slice(5).trim();
1628
+ if (!payload || payload === "[DONE]") continue;
1629
+ try {
1630
+ const parsed = JSON.parse(payload) as Record<string, unknown>;
1631
+ const response = isRecord(parsed.response) ? parsed.response : parsed;
1632
+ const candidates = Array.isArray(response.candidates) ? response.candidates : [];
1633
+ for (const candidate of candidates) {
1634
+ if (!isRecord(candidate) || !isRecord(candidate.content) || !Array.isArray(candidate.content.parts)) continue;
1635
+ for (const part of candidate.content.parts) {
1636
+ if (!isRecord(part)) continue;
1637
+ if (typeof part.text === "string" && part.text) {
1638
+ if (part.thought === true) {
1639
+ // Stream reasoning content via Responses API reasoning events.
1640
+ // First thought chunk: open the reasoning output item.
1641
+ if (reasoningOutputIndex === -1) {
1642
+ reasoningOutputIndex = nextOutputIndex++;
1643
+ reasoningItemId = makeCompatId("rs");
1644
+ writeResponsesEvent(res, {
1645
+ type: "response.output_item.added",
1646
+ output_index: reasoningOutputIndex,
1647
+ item: { id: reasoningItemId, type: "reasoning", status: "in_progress", summary: [] },
1648
+ });
1649
+ }
1650
+ writeResponsesEvent(res, {
1651
+ type: "response.reasoning_summary_text.delta",
1652
+ item_id: reasoningItemId,
1653
+ output_index: reasoningOutputIndex,
1654
+ summary_index: 0,
1655
+ delta: part.text,
1656
+ });
1657
+ thinkingText += part.text;
1658
+ continue;
1659
+ }
1660
+ // Non-thought text arriving: close reasoning item immediately so Codex
1661
+ // sees a completed reasoning block before any content/tool items.
1662
+ if (reasoningOutputIndex !== -1 && !reasoningDone) {
1663
+ reasoningDone = true;
1664
+ writeResponsesEvent(res, { type: "response.reasoning_summary_text.done", item_id: reasoningItemId, output_index: reasoningOutputIndex, summary_index: 0, text: thinkingText });
1665
+ writeResponsesEvent(res, { type: "response.output_item.done", output_index: reasoningOutputIndex, item: { id: reasoningItemId, type: "reasoning", status: "completed", summary: [{ type: "summary_text", text: thinkingText }] } });
1666
+ }
1667
+ if (messageOutputIndex === -1) {
1668
+ messageOutputIndex = nextOutputIndex++;
1669
+ messageItemId = makeCompatId("msg");
1670
+ writeResponsesEvent(res, {
1671
+ type: "response.output_item.added",
1672
+ output_index: messageOutputIndex,
1673
+ item: {
1674
+ id: messageItemId,
1675
+ type: "message",
1676
+ status: "completed",
1677
+ role: "assistant",
1678
+ content: [{ type: "output_text", text: "", annotations: [] }],
1679
+ },
1680
+ });
1681
+ }
1682
+ text += part.text;
1683
+ writeResponsesEvent(res, {
1684
+ type: "response.output_text.delta",
1685
+ item_id: messageItemId,
1686
+ output_index: messageOutputIndex,
1687
+ content_index: 0,
1688
+ delta: part.text,
1689
+ });
1690
+ } else if (isRecord(part.functionCall)) {
1691
+ // functionCall arriving: close reasoning item immediately if still open
1692
+ if (reasoningOutputIndex !== -1 && !reasoningDone) {
1693
+ reasoningDone = true;
1694
+ writeResponsesEvent(res, { type: "response.reasoning_summary_text.done", item_id: reasoningItemId, output_index: reasoningOutputIndex, summary_index: 0, text: thinkingText });
1695
+ writeResponsesEvent(res, { type: "response.output_item.done", output_index: reasoningOutputIndex, item: { id: reasoningItemId, type: "reasoning", status: "completed", summary: [{ type: "summary_text", text: thinkingText }] } });
1696
+ }
1697
+ const fc = part.functionCall;
1698
+ const name = typeof fc.name === "string" ? fc.name : "unknown";
1699
+ const args = fc.args !== undefined ? JSON.stringify(fc.args) : "{}";
1700
+ const callId = `call_${Date.now().toString(36)}_${toolCallIndex++}`;
1701
+ if (typeof part.thoughtSignature === "string" && part.thoughtSignature) {
1702
+ cacheThoughtSignature(callId, part.thoughtSignature);
1703
+ }
1704
+ toolCalls.push({ id: callId, type: "function", function: { name, arguments: args } });
1705
+ const item = {
1706
+ id: makeCompatId("fc"),
1707
+ type: "function_call",
1708
+ call_id: callId,
1709
+ name,
1710
+ arguments: args,
1711
+ status: "completed",
1712
+ };
1713
+ const outputIndex = nextOutputIndex++;
1714
+ writeResponsesEvent(res, { type: "response.output_item.added", output_index: outputIndex, item });
1715
+ writeResponsesEvent(res, { type: "response.function_call_arguments.delta", item_id: item.id, output_index: outputIndex, delta: args });
1716
+ writeResponsesEvent(res, { type: "response.function_call_arguments.done", item_id: item.id, output_index: outputIndex, arguments: args });
1717
+ writeResponsesEvent(res, { type: "response.output_item.done", output_index: outputIndex, item });
1718
+ }
1719
+ }
1720
+ }
1721
+ const usage = isRecord(response.usageMetadata) ? response.usageMetadata : isRecord(response.usage) ? response.usage : null;
1722
+ if (usage) {
1723
+ if (typeof usage.promptTokenCount === "number") inputTokens = usage.promptTokenCount;
1724
+ if (typeof usage.candidatesTokenCount === "number") outputTokens = usage.candidatesTokenCount;
1725
+ if (typeof usage.input_tokens === "number") inputTokens = usage.input_tokens;
1726
+ if (typeof usage.output_tokens === "number") outputTokens = usage.output_tokens;
1727
+ }
1728
+ } catch {
1729
+ // Ignore malformed JSON chunks
1730
+ }
1731
+ }
1732
+ }
1733
+ } catch (err) {
1734
+ compatLogger.warn(`Responses stream read error: ${err}`);
1735
+ }
1736
+
1737
+ const completion: CompatCompletion = {
1738
+ text,
1739
+ thinkingText: thinkingText || undefined,
1740
+ inputTokens,
1741
+ outputTokens,
1742
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
1743
+ };
1744
+ if (!reqClosed && !res.writableEnded) {
1745
+ // Close reasoning item if it was never closed mid-stream
1746
+ if (reasoningOutputIndex !== -1 && !reasoningDone) {
1747
+ writeResponsesEvent(res, {
1748
+ type: "response.reasoning_summary_text.done",
1749
+ item_id: reasoningItemId,
1750
+ output_index: reasoningOutputIndex,
1751
+ summary_index: 0,
1752
+ text: thinkingText,
1753
+ });
1754
+ writeResponsesEvent(res, {
1755
+ type: "response.output_item.done",
1756
+ output_index: reasoningOutputIndex,
1757
+ item: {
1758
+ id: reasoningItemId,
1759
+ type: "reasoning",
1760
+ status: "completed",
1761
+ summary: [{ type: "summary_text", text: thinkingText }],
1762
+ },
1763
+ });
1764
+ }
1765
+ if (messageOutputIndex !== -1) {
1766
+ writeResponsesEvent(res, {
1767
+ type: "response.output_text.done",
1768
+ item_id: messageItemId,
1769
+ output_index: messageOutputIndex,
1770
+ content_index: 0,
1771
+ text,
1772
+ });
1773
+ writeResponsesEvent(res, {
1774
+ type: "response.output_item.done",
1775
+ output_index: messageOutputIndex,
1776
+ item: {
1777
+ id: messageItemId,
1778
+ type: "message",
1779
+ status: "completed",
1780
+ role: "assistant",
1781
+ content: [{ type: "output_text", text, annotations: [] }],
1782
+ },
1783
+ });
1784
+ }
1785
+ writeResponsesEvent(res, {
1786
+ type: "response.completed",
1787
+ response: buildResponsesResponse(request, responseId, createdAt, completion, "completed", previousResponseId),
1788
+ });
1789
+ res.end();
1790
+ }
1791
+ return completion;
1792
+ }
1793
+
1794
+ async function completeResponsesViaRotator(
1795
+ req: IncomingMessage,
1796
+ res: ServerResponse,
1797
+ rotator: AccountRotator,
1798
+ request: OpenAIResponsesRequest,
1799
+ body: RequestBody,
1800
+ responseId: string,
1801
+ previousResponseId: string | null,
1802
+ ): Promise<{ completion: CompatCompletion; status: number; errorText?: string; streamed: boolean }> {
1803
+ const createdAt = Math.floor(Date.now() / 1000);
1804
+ const outcome = await withRotation(rotator, body.model, flattenHeaders(req.headers), body,
1805
+ async (response) => {
1806
+ const completion = await streamResponsesSse(response.body, req, res, request, responseId, previousResponseId, createdAt);
1807
+ if (completion.inputTokens > 0 || completion.outputTokens > 0) {
1808
+ rotator.recordTokenUsage(body.displayModel || body.model, completion.inputTokens, completion.outputTokens);
1809
+ }
1810
+ return completion;
1811
+ },
1812
+ );
1813
+ if (!outcome.ok) {
1814
+ return {
1815
+ completion: { text: "", inputTokens: 0, outputTokens: 0 },
1816
+ status: outcome.status,
1817
+ errorText: outcome.retryAfterMs ? `${outcome.errorText}; retryAfterMs=${outcome.retryAfterMs}` : outcome.errorText,
1818
+ streamed: false,
1819
+ };
1820
+ }
1821
+ return { completion: outcome.result, status: 200, streamed: true };
1822
+ }
1823
+
1151
1824
  async function completeViaRotator(
1152
1825
  req: IncomingMessage,
1153
1826
  res: ServerResponse,
@@ -1191,7 +1864,7 @@ async function completeViaRotator(
1191
1864
 
1192
1865
 
1193
1866
  const MODEL_CATALOG = [
1194
- { id: "gemini-3.5-flash-low", family: "gemini-3.5-flash", ctx: 1048576, quotaPool: "gemini-3.5-flash", multimodal: true, tools: true },
1867
+ { id: "gemini-3.5-flash-medium", family: "gemini-3.5-flash", ctx: 1048576, quotaPool: "gemini-3.5-flash", multimodal: true, tools: true },
1195
1868
  { id: "gemini-3.5-flash-high", family: "gemini-3.5-flash", ctx: 1048576, quotaPool: "gemini-3.5-flash", multimodal: true, tools: true },
1196
1869
  { id: "gemini-3-flash", family: "gemini-3.5-flash", ctx: 1048576, quotaPool: "gemini-3.5-flash", multimodal: true, tools: true },
1197
1870
  { id: "gemini-3.1-pro-low", family: "gemini-3.1-pro", ctx: 1048576, quotaPool: "gemini-3.1-pro", multimodal: true, tools: true },
@@ -1338,6 +2011,93 @@ export async function handleOpenAIChatCompletions(req: IncomingMessage, res: Ser
1338
2011
  });
1339
2012
  }
1340
2013
 
2014
+ export async function handleOpenAIResponsesCreate(req: IncomingMessage, res: ServerResponse, rotator: AccountRotator): Promise<void> {
2015
+ let parsed: unknown;
2016
+ try {
2017
+ parsed = await readJsonBody(req);
2018
+ } catch (err) {
2019
+ if (err instanceof PayloadTooLargeError) return writeJson(res, 413, { error: { message: "Payload too large", type: "invalid_request_error" } });
2020
+ return writeJson(res, 400, { error: { message: "Invalid JSON body", type: "invalid_request_error" } });
2021
+ }
2022
+
2023
+ const normalized = normalizeOpenAIResponsesRequest(parsed);
2024
+ const validation = validateOpenAIResponsesRequest(normalized);
2025
+ if (!validation.ok) return writeJson(res, 400, { error: { message: validation.errors.join("; "), type: "invalid_request_error" } });
2026
+
2027
+ let converted: ResponsesConversionResult;
2028
+ try {
2029
+ converted = convertResponsesToChatRequest(validation.value);
2030
+ } catch (err) {
2031
+ return writeJson(res, 400, { error: { message: err instanceof Error ? err.message : String(err), type: "invalid_request_error" } });
2032
+ }
2033
+
2034
+ const responseId = makeCompatId("resp");
2035
+ const createdAt = Math.floor(Date.now() / 1000);
2036
+ const requestBody = openAIToAntigravityBody(converted.chatRequest);
2037
+ requestBody.requestId = responseId;
2038
+
2039
+ if (validation.value.store !== false) {
2040
+ setStoredResponse(responseId, {
2041
+ response: buildResponsesResponse(validation.value, responseId, createdAt, { text: "", inputTokens: 0, outputTokens: 0, toolCalls: [] }, "in_progress", converted.previousResponseId),
2042
+ inputItems: converted.inputItems,
2043
+ conversationMessages: converted.conversationMessages,
2044
+ callIdToName: new Map(),
2045
+ expiresAt: Date.now() + RESPONSES_STORE_TTL_MS,
2046
+ });
2047
+ }
2048
+
2049
+ if (validation.value.stream) {
2050
+ const result = await completeResponsesViaRotator(req, res, rotator, validation.value, requestBody, responseId, converted.previousResponseId);
2051
+ if (result.status !== 200) {
2052
+ responsesStore.delete(responseId);
2053
+ if (!res.headersSent) return writeJson(res, result.status, { error: { message: result.errorText || "Upstream error", type: "upstream_error" } });
2054
+ return;
2055
+ }
2056
+ if (validation.value.store !== false) {
2057
+ const responseObject = buildResponsesResponse(validation.value, responseId, createdAt, result.completion, "completed", converted.previousResponseId);
2058
+ saveResponsesEntry(responseObject, converted.inputItems, converted.conversationMessages, result.completion);
2059
+ }
2060
+ return;
2061
+ }
2062
+
2063
+ const result = await completeViaRotator(req, res, rotator, requestBody, "none");
2064
+ if (result.status !== 200) {
2065
+ responsesStore.delete(responseId);
2066
+ return writeJson(res, result.status, { error: { message: result.errorText || "Upstream error", type: "upstream_error" } });
2067
+ }
2068
+
2069
+ const responseObject = buildResponsesResponse(validation.value, responseId, createdAt, result.completion, "completed", converted.previousResponseId);
2070
+ if (validation.value.store !== false) {
2071
+ saveResponsesEntry(responseObject, converted.inputItems, converted.conversationMessages, result.completion);
2072
+ } else {
2073
+ responsesStore.delete(responseId);
2074
+ }
2075
+ writeJson(res, 200, responseObject);
2076
+ }
2077
+
2078
+ export function handleOpenAIResponsesRetrieve(_req: IncomingMessage, res: ServerResponse, responseId: string): void {
2079
+ const entry = getStoredResponse(responseId);
2080
+ if (!entry) return writeJson(res, 404, { error: { message: `Response not found: ${responseId}`, type: "invalid_request_error" } });
2081
+ writeJson(res, 200, entry.response);
2082
+ }
2083
+
2084
+ export function handleOpenAIResponsesDelete(_req: IncomingMessage, res: ServerResponse, responseId: string): void {
2085
+ writeJson(res, 200, { id: responseId, object: "response.deleted", deleted: responsesStore.delete(responseId) });
2086
+ }
2087
+
2088
+ export function handleOpenAIResponsesCancel(_req: IncomingMessage, res: ServerResponse, responseId: string): void {
2089
+ const entry = getStoredResponse(responseId);
2090
+ if (!entry) return writeJson(res, 404, { error: { message: `Response not found: ${responseId}`, type: "invalid_request_error" } });
2091
+ if (entry.response.status === "in_progress") entry.response.status = "cancelled";
2092
+ writeJson(res, 200, entry.response);
2093
+ }
2094
+
2095
+ export function handleOpenAIResponsesInputItems(_req: IncomingMessage, res: ServerResponse, responseId: string): void {
2096
+ const entry = getStoredResponse(responseId);
2097
+ if (!entry) return writeJson(res, 404, { error: { message: `Response not found: ${responseId}`, type: "invalid_request_error" } });
2098
+ writeJson(res, 200, { object: "list", data: entry.inputItems, has_more: false, first_id: entry.inputItems[0]?.id ?? null, last_id: entry.inputItems.at(-1)?.id ?? null });
2099
+ }
2100
+
1341
2101
  export async function handleAnthropicMessages(req: IncomingMessage, res: ServerResponse, rotator: AccountRotator): Promise<void> {
1342
2102
  let parsed: unknown;
1343
2103
  try {
package/src/dashboard.ts CHANGED
@@ -2056,7 +2056,8 @@ var TOKEN_MODEL_COLORS = {
2056
2056
  'gemini-3.1-pro-high': '#3b82f6', // Azul
2057
2057
  'gemini-3.1-pro-low': '#38bdf8', // Celeste
2058
2058
  'gemini-3-flash': '#4ade80', // Verde
2059
- 'gemini-3.5-flash-low': '#a3e635', // Lime
2059
+ 'gemini-3.5-flash-low': '#a3e635', // Lime (legacy alias)
2060
+ 'gemini-3.5-flash-medium': '#a3e635', // Lime
2060
2061
  'gemini-3.5-flash-high': '#84cc16', // Darker Lime
2061
2062
  'gemini-3.5-flash': '#84cc16',
2062
2063
  'gemini-3.1-pro': '#fb923c', // Fallback genérico
@@ -2084,6 +2085,7 @@ var MODEL_PRICING_CLIENT = {
2084
2085
  'gemini-3-flash': { input: 0.50, output: 3.00 },
2085
2086
  'gemini-3.5-flash': { input: 0.50, output: 3.00 },
2086
2087
  'gemini-3.5-flash-low': { input: 0.50, output: 3.00 },
2088
+ 'gemini-3.5-flash-medium': { input: 0.50, output: 3.00 },
2087
2089
  'gemini-3.5-flash-high': { input: 0.50, output: 3.00 },
2088
2090
  'gpt-oss-120b-medium': { input: 2.00, output: 10.00 },
2089
2091
  };
package/src/proxy.ts CHANGED
@@ -36,7 +36,18 @@ import { trackFeature, reportFlagEvent, FLAG_PATTERNS, type FlagPattern } from "
36
36
  import type { FlagEventData } from "./telemetry.js";
37
37
  import { startVersionChecker, performSelfUpdate } from "./version-check.js";
38
38
  import { startNotificationPoller } from "./notification-poller.js";
39
- import { handleAnthropicMessages, handleGeminiGenerateContent, serveGeminiModels, handleOpenAIChatCompletions, serveOpenAIModels } from "./compat.js";
39
+ import {
40
+ handleAnthropicMessages,
41
+ handleGeminiGenerateContent,
42
+ handleOpenAIChatCompletions,
43
+ handleOpenAIResponsesCancel,
44
+ handleOpenAIResponsesCreate,
45
+ handleOpenAIResponsesDelete,
46
+ handleOpenAIResponsesInputItems,
47
+ handleOpenAIResponsesRetrieve,
48
+ serveGeminiModels,
49
+ serveOpenAIModels,
50
+ } from "./compat.js";
40
51
  import { applyConfigDefaults } from "./account-store.js";
41
52
  import { classifyRateLimitReason, parseRetryAfterMs } from "./rate-limit-parser.js";
42
53
 
@@ -243,7 +254,7 @@ export async function forwardRequest(
243
254
  // Map internal display/compat names to Google upstream names
244
255
  let targetModel = body.model;
245
256
  if (targetModel === "gemini-3.1-pro-high") targetModel = "gemini-pro-agent";
246
- if (targetModel === "gemini-3.5-flash-high" || targetModel === "gemini-3.5-flash") targetModel = "gemini-3-flash-agent";
257
+ if (targetModel === "gemini-3.5-flash-high" || targetModel === "gemini-3.5-flash" || targetModel === "gemini-3.5-flash-medium") targetModel = "gemini-3-flash-agent";
247
258
  if (targetModel === "gpt-oss-120b") targetModel = "gpt-oss-120b-medium";
248
259
  body.model = targetModel;
249
260
 
@@ -1106,16 +1117,35 @@ export function startProxy(rotator: AccountRotator, port: number, bindHost = "0.
1106
1117
  }
1107
1118
 
1108
1119
  if (method === "POST" && pathname === "/v1/chat/completions") {
1109
- handleOpenAIChatCompletions(req, res, rotator).catch((err) => {
1110
- log(`OpenAI compat error: ${err}`, rotator, "error");
1111
- if (!res.headersSent) res.writeHead(500, { "Content-Type": "application/json" });
1112
- res.end(JSON.stringify({ error: { message: "Internal OpenAI compat error", type: "server_error" } }));
1113
- });
1114
- return;
1115
- }
1120
+ handleOpenAIChatCompletions(req, res, rotator).catch((err) => {
1121
+ log(`OpenAI compat error: ${err}`, rotator, "error");
1122
+ if (!res.headersSent) res.writeHead(500, { "Content-Type": "application/json" });
1123
+ res.end(JSON.stringify({ error: { message: "Internal OpenAI compat error", type: "server_error" } }));
1124
+ });
1125
+ return;
1126
+ }
1127
+
1128
+ if (method === "POST" && pathname === "/v1/responses") {
1129
+ handleOpenAIResponsesCreate(req, res, rotator).catch((err) => {
1130
+ log(`OpenAI responses compat error: ${err}`, rotator, "error");
1131
+ if (!res.headersSent) res.writeHead(500, { "Content-Type": "application/json" });
1132
+ res.end(JSON.stringify({ error: { message: "Internal OpenAI responses compat error", type: "server_error" } }));
1133
+ });
1134
+ return;
1135
+ }
1136
+
1137
+ const responseMatch = pathname.match(/^\/v1\/responses\/([^/]+)(?:\/(cancel|input_items))?$/);
1138
+ if (responseMatch) {
1139
+ const responseId = decodeURIComponent(responseMatch[1]);
1140
+ const action = responseMatch[2] || "";
1141
+ if (method === "GET" && !action) return handleOpenAIResponsesRetrieve(req, res, responseId);
1142
+ if (method === "DELETE" && !action) return handleOpenAIResponsesDelete(req, res, responseId);
1143
+ if (method === "POST" && action === "cancel") return handleOpenAIResponsesCancel(req, res, responseId);
1144
+ if (method === "GET" && action === "input_items") return handleOpenAIResponsesInputItems(req, res, responseId);
1145
+ }
1116
1146
 
1117
- // Anthropic-compatible adapter route (additive; does not affect native v1internal route)
1118
- if (method === "POST" && pathname === "/v1/messages") {
1147
+ // Anthropic-compatible adapter route (additive; does not affect native v1internal route)
1148
+ if (method === "POST" && pathname === "/v1/messages") {
1119
1149
  handleAnthropicMessages(req, res, rotator).catch((err) => {
1120
1150
  log(`Anthropic compat error: ${err}`, rotator, "error");
1121
1151
  if (!res.headersSent) res.writeHead(500, { "Content-Type": "application/json" });
package/src/rotator.ts CHANGED
@@ -766,7 +766,7 @@ export class AccountRotator {
766
766
  ): number {
767
767
  const timerScore = (4 - priority) * 35;
768
768
  const quotaScore = Math.max(0, quota) * 0.7;
769
- const tierScore = Math.max(0, 3 - tier) * 18;
769
+ const tierScore = Math.max(0, 4 - tier) * 13.5;
770
770
  const healthScore = Math.max(0, Math.min(1, health)) * 25;
771
771
  const tokenScore = Math.max(0, Math.min(1, tokenRatio)) * 20;
772
772
  const lruScore = Math.max(0, 10 - distance);
@@ -1238,8 +1238,9 @@ export class AccountRotator {
1238
1238
  const tier = account.config.tier || "unknown";
1239
1239
  if (tier === "ultra") return 0;
1240
1240
  if (tier === "pro") return 1;
1241
- if (tier === "free") return 2;
1242
- return 3;
1241
+ if (tier === "plus") return 2;
1242
+ if (tier === "free") return 3;
1243
+ return 4;
1243
1244
  }
1244
1245
 
1245
1246
  private refreshHealthScores(): void {
package/src/types.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  // Account types and configuration
2
2
 
3
3
  export type AccountType = "pro" | "free";
4
- export type AccountTier = "ultra" | "pro" | "free" | "unknown";
4
+ export type AccountTier = "ultra" | "pro" | "plus" | "free" | "unknown";
5
5
  export type RoutingPolicy = "timer-first" | "tier-first" | "quota-first" | "hybrid";
6
6
 
7
7
  export type RoutingRejectionReason =
@@ -159,9 +159,9 @@ export function resolveDisplayModelKey(requestModel: string): string {
159
159
  if (lower.includes("-high")) return "gemini-3.1-pro-high";
160
160
  return "gemini-3.1-pro"; // unspecified variant
161
161
  }
162
- // Gemini 3.5 Flash — distinguish low vs high
162
+ // Gemini 3.5 Flash — distinguish medium vs high
163
163
  if (lower.includes("gemini") && lower.includes("3.5") && lower.includes("flash")) {
164
- if (lower.includes("-low") || lower.includes("-medium")) return "gemini-3.5-flash-low";
164
+ if (lower.includes("-low") || lower.includes("-medium")) return "gemini-3.5-flash-medium";
165
165
  if (lower.includes("-high")) return "gemini-3.5-flash-high";
166
166
  return "gemini-3.5-flash"; // unspecified variant
167
167
  }
@@ -440,6 +440,7 @@ export const MODEL_PRICING: Record<
440
440
  "gemini-3-flash": { inputPer1M: 0.50, outputPer1M: 3.00 },
441
441
  "gemini-3.5-flash": { inputPer1M: 1.50, outputPer1M: 9.00, cachingPer1M: 0.15, cachingStoragePer1MPerHour: 1.00 },
442
442
  "gemini-3.5-flash-low": { inputPer1M: 1.50, outputPer1M: 9.00, cachingPer1M: 0.15, cachingStoragePer1MPerHour: 1.00 },
443
+ "gemini-3.5-flash-medium": { inputPer1M: 1.50, outputPer1M: 9.00, cachingPer1M: 0.15, cachingStoragePer1MPerHour: 1.00 },
443
444
  "gemini-3.5-flash-high": { inputPer1M: 1.50, outputPer1M: 9.00, cachingPer1M: 0.15, cachingStoragePer1MPerHour: 1.00 },
444
445
  "gpt-oss-120b-medium": { inputPer1M: 2.00, outputPer1M: 10.00 },
445
446
  };
package/src/validators.ts CHANGED
@@ -39,8 +39,8 @@ export function validateAccountConfig(value: unknown, path = "account"): Validat
39
39
  if (!isNonEmptyString(value.projectId)) errors.push(`${path}.projectId must be a non-empty string`);
40
40
  if (value.label !== undefined && typeof value.label !== "string") errors.push(`${path}.label must be a string`);
41
41
  if (value.type !== undefined && value.type !== "pro" && value.type !== "free") errors.push(`${path}.type must be "pro" or "free"`);
42
- if (value.tier !== undefined && !["ultra", "pro", "free", "unknown"].includes(String(value.tier))) {
43
- errors.push(`${path}.tier must be "ultra", "pro", "free", or "unknown"`);
42
+ if (value.tier !== undefined && !["ultra", "pro", "plus", "free", "unknown"].includes(String(value.tier))) {
43
+ errors.push(`${path}.tier must be "ultra", "pro", "plus", "free", or "unknown"`);
44
44
  }
45
45
  if (value.familyManager !== undefined && typeof value.familyManager !== "boolean") errors.push(`${path}.familyManager must be a boolean`);
46
46