@draht/ai 2026.3.6 → 2026.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +14 -10
  2. package/dist/cli.js +0 -0
  3. package/dist/env-api-keys.d.ts.map +1 -1
  4. package/dist/env-api-keys.js +5 -2
  5. package/dist/env-api-keys.js.map +1 -1
  6. package/dist/index.d.ts +1 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +1 -0
  9. package/dist/index.js.map +1 -1
  10. package/dist/models.d.ts +1 -1
  11. package/dist/models.d.ts.map +1 -1
  12. package/dist/models.generated.d.ts +497 -259
  13. package/dist/models.generated.d.ts.map +1 -1
  14. package/dist/models.generated.js +683 -445
  15. package/dist/models.generated.js.map +1 -1
  16. package/dist/models.js +2 -2
  17. package/dist/models.js.map +1 -1
  18. package/dist/providers/amazon-bedrock.d.ts.map +1 -1
  19. package/dist/providers/amazon-bedrock.js +5 -2
  20. package/dist/providers/amazon-bedrock.js.map +1 -1
  21. package/dist/providers/anthropic.d.ts.map +1 -1
  22. package/dist/providers/anthropic.js +5 -2
  23. package/dist/providers/anthropic.js.map +1 -1
  24. package/dist/providers/azure-openai-responses.d.ts.map +1 -1
  25. package/dist/providers/azure-openai-responses.js +5 -2
  26. package/dist/providers/azure-openai-responses.js.map +1 -1
  27. package/dist/providers/google-gemini-cli.d.ts.map +1 -1
  28. package/dist/providers/google-gemini-cli.js +30 -17
  29. package/dist/providers/google-gemini-cli.js.map +1 -1
  30. package/dist/providers/google-shared.d.ts.map +1 -1
  31. package/dist/providers/google-shared.js +16 -22
  32. package/dist/providers/google-shared.js.map +1 -1
  33. package/dist/providers/google-vertex.d.ts.map +1 -1
  34. package/dist/providers/google-vertex.js +26 -5
  35. package/dist/providers/google-vertex.js.map +1 -1
  36. package/dist/providers/google.d.ts.map +1 -1
  37. package/dist/providers/google.js +5 -2
  38. package/dist/providers/google.js.map +1 -1
  39. package/dist/providers/mistral.d.ts +22 -0
  40. package/dist/providers/mistral.d.ts.map +1 -0
  41. package/dist/providers/mistral.js +498 -0
  42. package/dist/providers/mistral.js.map +1 -0
  43. package/dist/providers/openai-codex-responses.d.ts.map +1 -1
  44. package/dist/providers/openai-codex-responses.js +6 -3
  45. package/dist/providers/openai-codex-responses.js.map +1 -1
  46. package/dist/providers/openai-completions.d.ts.map +1 -1
  47. package/dist/providers/openai-completions.js +44 -67
  48. package/dist/providers/openai-completions.js.map +1 -1
  49. package/dist/providers/openai-responses-shared.d.ts.map +1 -1
  50. package/dist/providers/openai-responses-shared.js +36 -14
  51. package/dist/providers/openai-responses-shared.js.map +1 -1
  52. package/dist/providers/openai-responses.d.ts.map +1 -1
  53. package/dist/providers/openai-responses.js +5 -2
  54. package/dist/providers/openai-responses.js.map +1 -1
  55. package/dist/providers/register-builtins.d.ts +3 -2
  56. package/dist/providers/register-builtins.d.ts.map +1 -1
  57. package/dist/providers/register-builtins.js +6 -0
  58. package/dist/providers/register-builtins.js.map +1 -1
  59. package/dist/types.d.ts +9 -5
  60. package/dist/types.d.ts.map +1 -1
  61. package/dist/types.js.map +1 -1
  62. package/dist/utils/hash.d.ts +3 -0
  63. package/dist/utils/hash.d.ts.map +1 -0
  64. package/dist/utils/hash.js +14 -0
  65. package/dist/utils/hash.js.map +1 -0
  66. package/dist/utils/oauth/github-copilot.d.ts.map +1 -1
  67. package/dist/utils/oauth/github-copilot.js +23 -12
  68. package/dist/utils/oauth/github-copilot.js.map +1 -1
  69. package/dist/utils/overflow.d.ts +1 -1
  70. package/dist/utils/overflow.d.ts.map +1 -1
  71. package/dist/utils/overflow.js +5 -3
  72. package/dist/utils/overflow.js.map +1 -1
  73. package/package.json +6 -7
  74. package/oauth.d.ts +0 -1
  75. package/oauth.js +0 -1
@@ -1 +1 @@
1
- {"version":3,"file":"azure-openai-responses.js","sourceRoot":"","sources":["../../src/providers/azure-openai-responses.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,QAAQ,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAU7C,OAAO,EAAE,2BAA2B,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AACvH,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAEvE,MAAM,yBAAyB,GAAG,IAAI,CAAC;AACvC,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,wBAAwB,CAAC,CAAC,CAAC;AAE5G,SAAS,sBAAsB,CAAC,KAAyB,EAAuB;IAC/E,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC;IACvB,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,MAAM,CAAC,OAAO,EAAE,cAAc,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACxD,IAAI,CAAC,OAAO,IAAI,CAAC,cAAc;YAAE,SAAS;QAC1C,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,GAAG,CAAC;AAAA,CACX;AAED,SAAS,qBAAqB,CAAC,KAAsC,EAAE,OAAqC,EAAU;IACrH,IAAI,OAAO,EAAE,mBAAmB,EAAE,CAAC;QAClC,OAAO,OAAO,CAAC,mBAAmB,CAAC;IACpC,CAAC;IACD,MAAM,gBAAgB,GAAG,sBAAsB,CAAC,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5G,OAAO,gBAAgB,IAAI,KAAK,CAAC,EAAE,CAAC;AAAA,CACpC;AAYD;;GAEG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAA0E,CAChH,KAAsC,EACtC,OAAgB,EAChB,OAAqC,EACP,EAAE,CAAC;IACjC,MAAM,MAAM,GAAG,IAAI,2BAA2B,EAAE,CAAC;IAEjD,yBAAyB;IACzB,CAAC,KAAK,IAAI,EAAE,CAAC;QACZ,MAAM,cAAc,GAAG,qBAAqB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAE7D,MAAM,MAAM,GAAqB;YAChC,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,EAAE;YACX,GAAG,EAAE,wBAA+B;YACpC,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,KAAK,EAAE,KAAK,CAAC,EAAE;YACf,KAAK,EAAE;gBACN,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;aACpE;YACD,UAAU,EAAE,MAAM;YAClB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACrB,CAAC;QAEF,IAAI,CAAC;YACJ,6BAA6B;YAC7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,YAAY,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrE,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YACpD,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC;YACpE,OAAO,EAAE,SAAS,EAAE,CAAC,MAAM,CAAC,CAAC;YAC7B,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,MAAM,CACjD,MAAM,EACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CACxD,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;YAEhD,MAAM,sBAAsB,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YAElE,IAAI,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACxC,CAAC;YAED,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,OAAO,EAAE,CAAC;gBACtE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;YAC1E,MAAM,CAAC,GAAG,EAAE,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO;gBAAE,OAAQ,KAA4B,CAAC,KAAK,CAAC;YAC/E,MAAM,CAAC,UAAU,GAAG,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC;YACnE,MAAM,CAAC,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;YACrF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YACzE,MAAM,CAAC,GAAG,EAAE,CAAC;QACd,CAAC;IAAA,CACD,CAAC,EAAE,CAAC;IAEL,OAAO,MAAM,CAAC;AAAA,CACd,CAAC;AAEF,MAAM,CAAC,MAAM,gCAAgC,GAAkE,CAC9G,KAAsC,EACtC,OAAgB,EAChB,OAA6B,EACC,EAAE,CAAC;IACjC,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,YAAY,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC/D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,MAAM,IAAI,GAAG,gBAAgB,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,eAAe,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAEvG,OAAO,0BAA0B,CAAC,KAAK,EAAE,OAAO,EAAE;QACjD,GAAG,IAAI;QACP,eAAe;KACuB,CAAC,CAAC;AAAA,CACzC,CAAC;AAEF,SAAS,qBAAqB,CAAC,OAAe,EAAU;IACvD,OAAO,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AAAA,CACnC;AAED,SAAS,mBAAmB,CAAC,YAAoB,EAAU;IAC1D,OAAO,WAAW,YAAY,6BAA6B,CAAC;AAAA,CAC5D;AAED,SAAS,kBAAkB,CAC1B,KAAsC,EACtC,OAAqC,EACK;IAC1C,MAAM,UAAU,GAAG,OAAO,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,yBAAyB,CAAC;IAEjH,MAAM,OAAO,GAAG,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;IACxG,MAAM,YAAY,GAAG,OAAO,EAAE,iBAAiB,IAAI,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC;IAE1F,IAAI,eAAe,GAAG,OAAO,CAAC;IAE9B,IAAI,CAAC,eAAe,IAAI,YAAY,EAAE,CAAC;QACtC,eAAe,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACvC,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACd,wJAAwJ,CACxJ,CAAC;IACH,CAAC;IAED,OAAO;QACN,OAAO,EAAE,qBAAqB,CAAC,eAAe,CAAC;QAC/C,UAAU;KACV,CAAC;AAAA,CACF;AAED,SAAS,YAAY,CAAC,KAAsC,EAAE,MAAc,EAAE,OAAqC,EAAE;IACpH,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CACd,4GAA4G,CAC5G,CAAC;QACH,CAAC;QACD,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IAC3C,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;IAErC,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;QACtB,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,kBAAkB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAEnE,OAAO,IAAI,WAAW,CAAC;QACtB,MAAM;QACN,UAAU;QACV,uBAAuB,EAAE,IAAI;QAC7B,cAAc,EAAE,OAAO;QACvB,OAAO,EAAE,OAAO;KAChB,CAAC,CAAC;AAAA,CACH;AAED,SAAS,WAAW,CACnB,KAAsC,EACtC,OAAgB,EAChB,OAAgD,EAChD,cAAsB,EACrB;IACD,MAAM,QAAQ,GAAG,wBAAwB,CAAC,KAAK,EAAE,OAAO,EAAE,yBAAyB,CAAC,CAAC;IAErF,MAAM,MAAM,GAAkC;QAC7C,KAAK,EAAE,cAAc;QACrB,KAAK,EAAE,QAAQ;QACf,MAAM,EAAE,IAAI;QACZ,gBAAgB,EAAE,OAAO,EAAE,SAAS;KACpC,CAAC;IAEF,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;QACxB,MAAM,CAAC,iBAAiB,GAAG,OAAO,EAAE,SAAS,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,EAAE,WAAW,KAAK,SAAS,EAAE,CAAC;QACxC,MAAM,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,CAAC,KAAK,GAAG,qBAAqB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACrB,IAAI,OAAO,EAAE,eAAe,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;YAC3D,MAAM,CAAC,SAAS,GAAG;gBAClB,MAAM,EAAE,OAAO,EAAE,eAAe,IAAI,QAAQ;gBAC5C,OAAO,EAAE,OAAO,EAAE,gBAAgB,IAAI,MAAM;aAC5C,CAAC;YACF,MAAM,CAAC,OAAO,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACP,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,mGAAmG;gBACnG,QAAQ,CAAC,IAAI,CAAC;oBACb,IAAI,EAAE,WAAW;oBACjB,OAAO,EAAE;wBACR;4BACC,IAAI,EAAE,YAAY;4BAClB,IAAI,EAAE,uBAAuB;yBAC7B;qBACD;iBACD,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AAAA,CACd","sourcesContent":["import { AzureOpenAI } from \"openai\";\nimport type { ResponseCreateParamsStreaming } from \"openai/resources/responses/responses.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { supportsXhigh } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStreamFunction,\n\tStreamOptions,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { convertResponsesMessages, convertResponsesTools, processResponsesStream } from \"./openai-responses-shared.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\n\nconst DEFAULT_AZURE_API_VERSION = \"v1\";\nconst AZURE_TOOL_CALL_PROVIDERS = new Set([\"openai\", \"openai-codex\", \"opencode\", \"azure-openai-responses\"]);\n\nfunction parseDeploymentNameMap(value: string | undefined): Map<string, string> {\n\tconst map = new Map<string, string>();\n\tif (!value) return map;\n\tfor (const entry of value.split(\",\")) {\n\t\tconst trimmed = entry.trim();\n\t\tif (!trimmed) continue;\n\t\tconst [modelId, deploymentName] = trimmed.split(\"=\", 2);\n\t\tif (!modelId || !deploymentName) continue;\n\t\tmap.set(modelId.trim(), deploymentName.trim());\n\t}\n\treturn map;\n}\n\nfunction resolveDeploymentName(model: Model<\"azure-openai-responses\">, options?: AzureOpenAIResponsesOptions): string {\n\tif (options?.azureDeploymentName) {\n\t\treturn options.azureDeploymentName;\n\t}\n\tconst mappedDeployment = parseDeploymentNameMap(process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id);\n\treturn mappedDeployment || model.id;\n}\n\n// Azure OpenAI Responses-specific options\nexport interface AzureOpenAIResponsesOptions extends StreamOptions {\n\treasoningEffort?: \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n\treasoningSummary?: \"auto\" | \"detailed\" | \"concise\" | null;\n\tazureApiVersion?: string;\n\tazureResourceName?: string;\n\tazureBaseUrl?: string;\n\tazureDeploymentName?: string;\n}\n\n/**\n * Generate function for Azure OpenAI Responses API\n */\nexport const streamAzureOpenAIResponses: StreamFunction<\"azure-openai-responses\", AzureOpenAIResponsesOptions> = (\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions?: AzureOpenAIResponsesOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t// Start async processing\n\t(async () => {\n\t\tconst deploymentName = resolveDeploymentName(model, options);\n\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"azure-openai-responses\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\t// Create Azure OpenAI client\n\t\t\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider) || \"\";\n\t\t\tconst client = createClient(model, apiKey, options);\n\t\t\tconst params = buildParams(model, context, options, deploymentName);\n\t\t\toptions?.onPayload?.(params);\n\t\t\tconst openaiStream = await client.responses.create(\n\t\t\t\tparams,\n\t\t\t\toptions?.signal ? { signal: options.signal } : undefined,\n\t\t\t);\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\tawait processResponsesStream(openaiStream, output, stream, model);\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) delete (block as { index?: number }).index;\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleAzureOpenAIResponses: StreamFunction<\"azure-openai-responses\", SimpleStreamOptions> = (\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tconst reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);\n\n\treturn streamAzureOpenAIResponses(model, context, {\n\t\t...base,\n\t\treasoningEffort,\n\t} satisfies AzureOpenAIResponsesOptions);\n};\n\nfunction normalizeAzureBaseUrl(baseUrl: string): string {\n\treturn baseUrl.replace(/\\/+$/, \"\");\n}\n\nfunction buildDefaultBaseUrl(resourceName: string): string {\n\treturn `https://${resourceName}.openai.azure.com/openai/v1`;\n}\n\nfunction resolveAzureConfig(\n\tmodel: Model<\"azure-openai-responses\">,\n\toptions?: AzureOpenAIResponsesOptions,\n): { baseUrl: string; apiVersion: string } {\n\tconst apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;\n\n\tconst baseUrl = options?.azureBaseUrl?.trim() || process.env.AZURE_OPENAI_BASE_URL?.trim() || undefined;\n\tconst resourceName = options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;\n\n\tlet resolvedBaseUrl = baseUrl;\n\n\tif (!resolvedBaseUrl && resourceName) {\n\t\tresolvedBaseUrl = buildDefaultBaseUrl(resourceName);\n\t}\n\n\tif (!resolvedBaseUrl && model.baseUrl) {\n\t\tresolvedBaseUrl = model.baseUrl;\n\t}\n\n\tif (!resolvedBaseUrl) {\n\t\tthrow new Error(\n\t\t\t\"Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.\",\n\t\t);\n\t}\n\n\treturn {\n\t\tbaseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),\n\t\tapiVersion,\n\t};\n}\n\nfunction createClient(model: Model<\"azure-openai-responses\">, apiKey: string, options?: AzureOpenAIResponsesOptions) {\n\tif (!apiKey) {\n\t\tif (!process.env.AZURE_OPENAI_API_KEY) {\n\t\t\tthrow new Error(\n\t\t\t\t\"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.\",\n\t\t\t);\n\t\t}\n\t\tapiKey = process.env.AZURE_OPENAI_API_KEY;\n\t}\n\n\tconst headers = { ...model.headers };\n\n\tif (options?.headers) {\n\t\tObject.assign(headers, options.headers);\n\t}\n\n\tconst { baseUrl, apiVersion } = resolveAzureConfig(model, options);\n\n\treturn new AzureOpenAI({\n\t\tapiKey,\n\t\tapiVersion,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders: headers,\n\t\tbaseURL: baseUrl,\n\t});\n}\n\nfunction buildParams(\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions: AzureOpenAIResponsesOptions | undefined,\n\tdeploymentName: string,\n) {\n\tconst messages = convertResponsesMessages(model, context, AZURE_TOOL_CALL_PROVIDERS);\n\n\tconst params: ResponseCreateParamsStreaming = {\n\t\tmodel: deploymentName,\n\t\tinput: messages,\n\t\tstream: true,\n\t\tprompt_cache_key: options?.sessionId,\n\t};\n\n\tif (options?.maxTokens) {\n\t\tparams.max_output_tokens = options?.maxTokens;\n\t}\n\n\tif (options?.temperature !== undefined) {\n\t\tparams.temperature = options?.temperature;\n\t}\n\n\tif (context.tools) {\n\t\tparams.tools = convertResponsesTools(context.tools);\n\t}\n\n\tif (model.reasoning) {\n\t\tif (options?.reasoningEffort || options?.reasoningSummary) {\n\t\t\tparams.reasoning = {\n\t\t\t\teffort: options?.reasoningEffort || \"medium\",\n\t\t\t\tsummary: options?.reasoningSummary || \"auto\",\n\t\t\t};\n\t\t\tparams.include = [\"reasoning.encrypted_content\"];\n\t\t} else {\n\t\t\tif (model.name.toLowerCase().startsWith(\"gpt-5\")) {\n\t\t\t\t// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7\n\t\t\t\tmessages.push({\n\t\t\t\t\trole: \"developer\",\n\t\t\t\t\tcontent: [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ttype: \"input_text\",\n\t\t\t\t\t\t\ttext: \"# Juice: 0 !important\",\n\t\t\t\t\t\t},\n\t\t\t\t\t],\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t}\n\n\treturn params;\n}\n"]}
1
+ {"version":3,"file":"azure-openai-responses.js","sourceRoot":"","sources":["../../src/providers/azure-openai-responses.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,QAAQ,CAAC;AAErC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAU7C,OAAO,EAAE,2BAA2B,EAAE,MAAM,0BAA0B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AACvH,OAAO,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAEvE,MAAM,yBAAyB,GAAG,IAAI,CAAC;AACvC,MAAM,yBAAyB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,cAAc,EAAE,UAAU,EAAE,wBAAwB,CAAC,CAAC,CAAC;AAE5G,SAAS,sBAAsB,CAAC,KAAyB,EAAuB;IAC/E,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC;IACvB,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO;YAAE,SAAS;QACvB,MAAM,CAAC,OAAO,EAAE,cAAc,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACxD,IAAI,CAAC,OAAO,IAAI,CAAC,cAAc;YAAE,SAAS;QAC1C,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,GAAG,CAAC;AAAA,CACX;AAED,SAAS,qBAAqB,CAAC,KAAsC,EAAE,OAAqC,EAAU;IACrH,IAAI,OAAO,EAAE,mBAAmB,EAAE,CAAC;QAClC,OAAO,OAAO,CAAC,mBAAmB,CAAC;IACpC,CAAC;IACD,MAAM,gBAAgB,GAAG,sBAAsB,CAAC,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5G,OAAO,gBAAgB,IAAI,KAAK,CAAC,EAAE,CAAC;AAAA,CACpC;AAYD;;GAEG;AACH,MAAM,CAAC,MAAM,0BAA0B,GAA0E,CAChH,KAAsC,EACtC,OAAgB,EAChB,OAAqC,EACP,EAAE,CAAC;IACjC,MAAM,MAAM,GAAG,IAAI,2BAA2B,EAAE,CAAC;IAEjD,yBAAyB;IACzB,CAAC,KAAK,IAAI,EAAE,CAAC;QACZ,MAAM,cAAc,GAAG,qBAAqB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAE7D,MAAM,MAAM,GAAqB;YAChC,IAAI,EAAE,WAAW;YACjB,OAAO,EAAE,EAAE;YACX,GAAG,EAAE,wBAA+B;YACpC,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,KAAK,EAAE,KAAK,CAAC,EAAE;YACf,KAAK,EAAE;gBACN,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;aACpE;YACD,UAAU,EAAE,MAAM;YAClB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACrB,CAAC;QAEF,IAAI,CAAC;YACJ,6BAA6B;YAC7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,YAAY,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrE,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YACpD,IAAI,MAAM,GAAG,WAAW,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC;YAClE,MAAM,UAAU,GAAG,MAAM,OAAO,EAAE,SAAS,EAAE,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;YAC7D,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;gBAC9B,MAAM,GAAG,UAA2C,CAAC;YACtD,CAAC;YACD,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,MAAM,CACjD,MAAM,EACN,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,SAAS,CACxD,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;YAEhD,MAAM,sBAAsB,CAAC,YAAY,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YAElE,IAAI,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACxC,CAAC;YAED,IAAI,MAAM,CAAC,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,KAAK,OAAO,EAAE,CAAC;gBACtE,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;YAC1E,MAAM,CAAC,GAAG,EAAE,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,OAAO;gBAAE,OAAQ,KAA4B,CAAC,KAAK,CAAC;YAC/E,MAAM,CAAC,UAAU,GAAG,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC;YACnE,MAAM,CAAC,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;YACrF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YACzE,MAAM,CAAC,GAAG,EAAE,CAAC;QACd,CAAC;IAAA,CACD,CAAC,EAAE,CAAC;IAEL,OAAO,MAAM,CAAC;AAAA,CACd,CAAC;AAEF,MAAM,CAAC,MAAM,gCAAgC,GAAkE,CAC9G,KAAsC,EACtC,OAAgB,EAChB,OAA6B,EACC,EAAE,CAAC;IACjC,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,YAAY,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC/D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,MAAM,IAAI,GAAG,gBAAgB,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,eAAe,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAEvG,OAAO,0BAA0B,CAAC,KAAK,EAAE,OAAO,EAAE;QACjD,GAAG,IAAI;QACP,eAAe;KACuB,CAAC,CAAC;AAAA,CACzC,CAAC;AAEF,SAAS,qBAAqB,CAAC,OAAe,EAAU;IACvD,OAAO,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AAAA,CACnC;AAED,SAAS,mBAAmB,CAAC,YAAoB,EAAU;IAC1D,OAAO,WAAW,YAAY,6BAA6B,CAAC;AAAA,CAC5D;AAED,SAAS,kBAAkB,CAC1B,KAAsC,EACtC,OAAqC,EACK;IAC1C,MAAM,UAAU,GAAG,OAAO,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,yBAAyB,CAAC;IAEjH,MAAM,OAAO,GAAG,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;IACxG,MAAM,YAAY,GAAG,OAAO,EAAE,iBAAiB,IAAI,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC;IAE1F,IAAI,eAAe,GAAG,OAAO,CAAC;IAE9B,IAAI,CAAC,eAAe,IAAI,YAAY,EAAE,CAAC;QACtC,eAAe,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACvC,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,eAAe,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACd,wJAAwJ,CACxJ,CAAC;IACH,CAAC;IAED,OAAO;QACN,OAAO,EAAE,qBAAqB,CAAC,eAAe,CAAC;QAC/C,UAAU;KACV,CAAC;AAAA,CACF;AAED,SAAS,YAAY,CAAC,KAAsC,EAAE,MAAc,EAAE,OAAqC,EAAE;IACpH,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CACd,4GAA4G,CAC5G,CAAC;QACH,CAAC;QACD,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;IAC3C,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;IAErC,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;QACtB,MAAM,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,kBAAkB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAEnE,OAAO,IAAI,WAAW,CAAC;QACtB,MAAM;QACN,UAAU;QACV,uBAAuB,EAAE,IAAI;QAC7B,cAAc,EAAE,OAAO;QACvB,OAAO,EAAE,OAAO;KAChB,CAAC,CAAC;AAAA,CACH;AAED,SAAS,WAAW,CACnB,KAAsC,EACtC,OAAgB,EAChB,OAAgD,EAChD,cAAsB,EACrB;IACD,MAAM,QAAQ,GAAG,wBAAwB,CAAC,KAAK,EAAE,OAAO,EAAE,yBAAyB,CAAC,CAAC;IAErF,MAAM,MAAM,GAAkC;QAC7C,KAAK,EAAE,cAAc;QACrB,KAAK,EAAE,QAAQ;QACf,MAAM,EAAE,IAAI;QACZ,gBAAgB,EAAE,OAAO,EAAE,SAAS;KACpC,CAAC;IAEF,IAAI,OAAO,EAAE,SAAS,EAAE,CAAC;QACxB,MAAM,CAAC,iBAAiB,GAAG,OAAO,EAAE,SAAS,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,EAAE,WAAW,KAAK,SAAS,EAAE,CAAC;QACxC,MAAM,CAAC,WAAW,GAAG,OAAO,EAAE,WAAW,CAAC;IAC3C,CAAC;IAED,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,CAAC,KAAK,GAAG,qBAAqB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACrD,CAAC;IAED,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACrB,IAAI,OAAO,EAAE,eAAe,IAAI,OAAO,EAAE,gBAAgB,EAAE,CAAC;YAC3D,MAAM,CAAC,SAAS,GAAG;gBAClB,MAAM,EAAE,OAAO,EAAE,eAAe,IAAI,QAAQ;gBAC5C,OAAO,EAAE,OAAO,EAAE,gBAAgB,IAAI,MAAM;aAC5C,CAAC;YACF,MAAM,CAAC,OAAO,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACP,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBAClD,mGAAmG;gBACnG,QAAQ,CAAC,IAAI,CAAC;oBACb,IAAI,EAAE,WAAW;oBACjB,OAAO,EAAE;wBACR;4BACC,IAAI,EAAE,YAAY;4BAClB,IAAI,EAAE,uBAAuB;yBAC7B;qBACD;iBACD,CAAC,CAAC;YACJ,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AAAA,CACd","sourcesContent":["import { AzureOpenAI } from \"openai\";\nimport type { ResponseCreateParamsStreaming } from \"openai/resources/responses/responses.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { supportsXhigh } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStreamFunction,\n\tStreamOptions,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { convertResponsesMessages, convertResponsesTools, processResponsesStream } from \"./openai-responses-shared.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\n\nconst DEFAULT_AZURE_API_VERSION = \"v1\";\nconst AZURE_TOOL_CALL_PROVIDERS = new Set([\"openai\", \"openai-codex\", \"opencode\", \"azure-openai-responses\"]);\n\nfunction parseDeploymentNameMap(value: string | undefined): Map<string, string> {\n\tconst map = new Map<string, string>();\n\tif (!value) return map;\n\tfor (const entry of value.split(\",\")) {\n\t\tconst trimmed = entry.trim();\n\t\tif (!trimmed) continue;\n\t\tconst [modelId, deploymentName] = trimmed.split(\"=\", 2);\n\t\tif (!modelId || !deploymentName) continue;\n\t\tmap.set(modelId.trim(), deploymentName.trim());\n\t}\n\treturn map;\n}\n\nfunction resolveDeploymentName(model: Model<\"azure-openai-responses\">, options?: AzureOpenAIResponsesOptions): string {\n\tif (options?.azureDeploymentName) {\n\t\treturn options.azureDeploymentName;\n\t}\n\tconst mappedDeployment = parseDeploymentNameMap(process.env.AZURE_OPENAI_DEPLOYMENT_NAME_MAP).get(model.id);\n\treturn mappedDeployment || model.id;\n}\n\n// Azure OpenAI Responses-specific options\nexport interface AzureOpenAIResponsesOptions extends StreamOptions {\n\treasoningEffort?: \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n\treasoningSummary?: \"auto\" | \"detailed\" | \"concise\" | null;\n\tazureApiVersion?: string;\n\tazureResourceName?: string;\n\tazureBaseUrl?: string;\n\tazureDeploymentName?: string;\n}\n\n/**\n * Generate function for Azure OpenAI Responses API\n */\nexport const streamAzureOpenAIResponses: StreamFunction<\"azure-openai-responses\", AzureOpenAIResponsesOptions> = (\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions?: AzureOpenAIResponsesOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t// Start async processing\n\t(async () => {\n\t\tconst deploymentName = resolveDeploymentName(model, options);\n\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"azure-openai-responses\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\t// Create Azure OpenAI client\n\t\t\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider) || \"\";\n\t\t\tconst client = createClient(model, apiKey, options);\n\t\t\tlet params = buildParams(model, context, options, deploymentName);\n\t\t\tconst nextParams = await options?.onPayload?.(params, model);\n\t\t\tif (nextParams !== undefined) {\n\t\t\t\tparams = nextParams as ResponseCreateParamsStreaming;\n\t\t\t}\n\t\t\tconst openaiStream = await client.responses.create(\n\t\t\t\tparams,\n\t\t\t\toptions?.signal ? { signal: options.signal } : undefined,\n\t\t\t);\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\tawait processResponsesStream(openaiStream, output, stream, model);\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) delete (block as { index?: number }).index;\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleAzureOpenAIResponses: StreamFunction<\"azure-openai-responses\", SimpleStreamOptions> = (\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tconst reasoningEffort = supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning);\n\n\treturn streamAzureOpenAIResponses(model, context, {\n\t\t...base,\n\t\treasoningEffort,\n\t} satisfies AzureOpenAIResponsesOptions);\n};\n\nfunction normalizeAzureBaseUrl(baseUrl: string): string {\n\treturn baseUrl.replace(/\\/+$/, \"\");\n}\n\nfunction buildDefaultBaseUrl(resourceName: string): string {\n\treturn `https://${resourceName}.openai.azure.com/openai/v1`;\n}\n\nfunction resolveAzureConfig(\n\tmodel: Model<\"azure-openai-responses\">,\n\toptions?: AzureOpenAIResponsesOptions,\n): { baseUrl: string; apiVersion: string } {\n\tconst apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;\n\n\tconst baseUrl = options?.azureBaseUrl?.trim() || process.env.AZURE_OPENAI_BASE_URL?.trim() || undefined;\n\tconst resourceName = options?.azureResourceName || process.env.AZURE_OPENAI_RESOURCE_NAME;\n\n\tlet resolvedBaseUrl = baseUrl;\n\n\tif (!resolvedBaseUrl && resourceName) {\n\t\tresolvedBaseUrl = buildDefaultBaseUrl(resourceName);\n\t}\n\n\tif (!resolvedBaseUrl && model.baseUrl) {\n\t\tresolvedBaseUrl = model.baseUrl;\n\t}\n\n\tif (!resolvedBaseUrl) {\n\t\tthrow new Error(\n\t\t\t\"Azure OpenAI base URL is required. Set AZURE_OPENAI_BASE_URL or AZURE_OPENAI_RESOURCE_NAME, or pass azureBaseUrl, azureResourceName, or model.baseUrl.\",\n\t\t);\n\t}\n\n\treturn {\n\t\tbaseUrl: normalizeAzureBaseUrl(resolvedBaseUrl),\n\t\tapiVersion,\n\t};\n}\n\nfunction createClient(model: Model<\"azure-openai-responses\">, apiKey: string, options?: AzureOpenAIResponsesOptions) {\n\tif (!apiKey) {\n\t\tif (!process.env.AZURE_OPENAI_API_KEY) {\n\t\t\tthrow new Error(\n\t\t\t\t\"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.\",\n\t\t\t);\n\t\t}\n\t\tapiKey = process.env.AZURE_OPENAI_API_KEY;\n\t}\n\n\tconst headers = { ...model.headers };\n\n\tif (options?.headers) {\n\t\tObject.assign(headers, options.headers);\n\t}\n\n\tconst { baseUrl, apiVersion } = resolveAzureConfig(model, options);\n\n\treturn new AzureOpenAI({\n\t\tapiKey,\n\t\tapiVersion,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders: headers,\n\t\tbaseURL: baseUrl,\n\t});\n}\n\nfunction buildParams(\n\tmodel: Model<\"azure-openai-responses\">,\n\tcontext: Context,\n\toptions: AzureOpenAIResponsesOptions | undefined,\n\tdeploymentName: string,\n) {\n\tconst messages = convertResponsesMessages(model, context, AZURE_TOOL_CALL_PROVIDERS);\n\n\tconst params: ResponseCreateParamsStreaming = {\n\t\tmodel: deploymentName,\n\t\tinput: messages,\n\t\tstream: true,\n\t\tprompt_cache_key: options?.sessionId,\n\t};\n\n\tif (options?.maxTokens) {\n\t\tparams.max_output_tokens = options?.maxTokens;\n\t}\n\n\tif (options?.temperature !== undefined) {\n\t\tparams.temperature = options?.temperature;\n\t}\n\n\tif (context.tools) {\n\t\tparams.tools = convertResponsesTools(context.tools);\n\t}\n\n\tif (model.reasoning) {\n\t\tif (options?.reasoningEffort || options?.reasoningSummary) {\n\t\t\tparams.reasoning = {\n\t\t\t\teffort: options?.reasoningEffort || \"medium\",\n\t\t\t\tsummary: options?.reasoningSummary || \"auto\",\n\t\t\t};\n\t\t\tparams.include = [\"reasoning.encrypted_content\"];\n\t\t} else {\n\t\t\tif (model.name.toLowerCase().startsWith(\"gpt-5\")) {\n\t\t\t\t// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7\n\t\t\t\tmessages.push({\n\t\t\t\t\trole: \"developer\",\n\t\t\t\t\tcontent: [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ttype: \"input_text\",\n\t\t\t\t\t\t\ttext: \"# Juice: 0 !important\",\n\t\t\t\t\t\t},\n\t\t\t\t\t],\n\t\t\t\t});\n\t\t\t}\n\t\t}\n\t}\n\n\treturn params;\n}\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"google-gemini-cli.d.ts","sourceRoot":"","sources":["../../src/providers/google-gemini-cli.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE7D,OAAO,KAAK,EAGX,OAAO,EACP,KAAK,EACL,mBAAmB,EACnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AAGrB,OAAO,EAEN,YAAY,EAGZ,aAAa,EAEb,MAAM,oBAAoB,CAAC;AAG5B;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,GAAG,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEvG,MAAM,WAAW,sBAAuB,SAAQ,aAAa;IAC5D,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,CAAC,EAAE;QACV,OAAO,EAAE,OAAO,CAAC;QACjB,4DAA4D;QAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,qGAAqG;QACrG,KAAK,CAAC,EAAE,mBAAmB,CAAC;KAC5B,CAAC;IACF,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAiDD;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,SAAS,CAyFtG;AA8DD,UAAU,sBAAsB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACR,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,iBAAiB,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAA;aAAE,EAAE,CAAA;SAAE,CAAC;QACjE,gBAAgB,CAAC,EAAE;YAClB,eAAe,CAAC,EAAE,MAAM,CAAC;YACzB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,cAAc,CAAC,EAAE,cAAc,CAAC;SAChC,CAAC;QACF,KAAK,CAAC,EAAE,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;QACxC,UAAU,CAAC,EAAE;YACZ,qBAAqB,EAAE;gBACtB,IAAI,EAAE,UAAU,CAAC,OAAO,aAAa,CAAC,CAAC;aACvC,CAAC;SACF,CAAC;KACF,CAAC;IACF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAiCD,eAAO,MAAM,qBAAqB,EAAE,cAAc,CAAC,mBAAmB,EAAE,sBAAsB,CAkd7F,CAAC;AAEF,eAAO,MAAM,2BAA2B,EAAE,cAAc,CAAC,mBAAmB,EAAE,mBAAmB,CAqDhG,CAAC;AAEF,wBAAgB,YAAY,CAC3B,KAAK,EAAE,KAAK,CAAC,mBAAmB,CAAC,EACjC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,sBAA2B,EACpC,aAAa,UAAQ,GACnB,sBAAsB,CA4ExB","sourcesContent":["/**\n * Google Gemini CLI / Antigravity provider.\n * Shared implementation for both google-gemini-cli and google-antigravity providers.\n * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.\n */\n\nimport type { Content, ThinkingConfig } from \"@google/genai\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingBudgets,\n\tThinkingContent,\n\tThinkingLevel,\n\tToolCall,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport {\n\tconvertMessages,\n\tconvertTools,\n\tisThinkingPart,\n\tmapStopReasonString,\n\tmapToolChoice,\n\tretainThoughtSignature,\n} from \"./google-shared.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\n\n/**\n * Thinking level for Gemini 3 models.\n * Mirrors Google's ThinkingLevel enum values.\n */\nexport type GoogleThinkingLevel = \"THINKING_LEVEL_UNSPECIFIED\" | \"MINIMAL\" | \"LOW\" | \"MEDIUM\" | \"HIGH\";\n\nexport interface GoogleGeminiCliOptions extends StreamOptions {\n\ttoolChoice?: \"auto\" | \"none\" | \"any\";\n\t/**\n\t * Thinking/reasoning configuration.\n\t * - Gemini 2.x models: use `budgetTokens` to set the thinking budget\n\t * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead\n\t *\n\t * When using `streamSimple`, this is handled automatically based on the model.\n\t */\n\tthinking?: {\n\t\tenabled: boolean;\n\t\t/** Thinking budget in tokens. Use for Gemini 2.x models. */\n\t\tbudgetTokens?: number;\n\t\t/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */\n\t\tlevel?: GoogleThinkingLevel;\n\t};\n\tprojectId?: string;\n}\n\nconst DEFAULT_ENDPOINT = \"https://cloudcode-pa.googleapis.com\";\nconst ANTIGRAVITY_DAILY_ENDPOINT = \"https://daily-cloudcode-pa.sandbox.googleapis.com\";\nconst ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;\n// Headers for Gemini CLI (prod endpoint)\nconst GEMINI_CLI_HEADERS = {\n\t\"User-Agent\": \"google-cloud-sdk vscode_cloudshelleditor/0.1\",\n\t\"X-Goog-Api-Client\": \"gl-node/22.17.0\",\n\t\"Client-Metadata\": JSON.stringify({\n\t\tideType: \"IDE_UNSPECIFIED\",\n\t\tplatform: \"PLATFORM_UNSPECIFIED\",\n\t\tpluginType: \"GEMINI\",\n\t}),\n};\n\n// Headers for Antigravity (sandbox endpoint) - requires specific User-Agent\nconst DEFAULT_ANTIGRAVITY_VERSION = \"1.18.3\";\n\nfunction getAntigravityHeaders() {\n\tconst version = process.env.DRAHT_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;\n\treturn {\n\t\t\"User-Agent\": `antigravity/${version} darwin/arm64`,\n\t\t\"X-Goog-Api-Client\": \"google-cloud-sdk vscode_cloudshelleditor/0.1\",\n\t\t\"Client-Metadata\": JSON.stringify({\n\t\t\tideType: \"IDE_UNSPECIFIED\",\n\t\t\tplatform: \"PLATFORM_UNSPECIFIED\",\n\t\t\tpluginType: \"GEMINI\",\n\t\t}),\n\t};\n}\n\n// Antigravity system instruction (compact version from CLIProxyAPI).\nconst ANTIGRAVITY_SYSTEM_INSTRUCTION =\n\t\"You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.\" +\n\t\"You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.\" +\n\t\"**Absolute paths only**\" +\n\t\"**Proactiveness**\";\n\n// Counter for generating unique tool call IDs\nlet toolCallCounter = 0;\n\n// Retry configuration\nconst MAX_RETRIES = 3;\nconst BASE_DELAY_MS = 1000;\nconst MAX_EMPTY_STREAM_RETRIES = 2;\nconst EMPTY_STREAM_BASE_DELAY_MS = 500;\nconst CLAUDE_THINKING_BETA_HEADER = \"interleaved-thinking-2025-05-14\";\n\n/**\n * Extract retry delay from Gemini error response (in milliseconds).\n * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),\n * then parses body patterns like:\n * - \"Your quota will reset after 39s\"\n * - \"Your quota will reset after 18h31m10s\"\n * - \"Please retry in Xs\" or \"Please retry in Xms\"\n * - \"retryDelay\": \"34.074824224s\" (JSON field)\n */\nexport function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {\n\tconst normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);\n\n\tconst headers = response instanceof Headers ? response : response?.headers;\n\tif (headers) {\n\t\tconst retryAfter = headers.get(\"retry-after\");\n\t\tif (retryAfter) {\n\t\t\tconst retryAfterSeconds = Number(retryAfter);\n\t\t\tif (Number.isFinite(retryAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t\tconst retryAfterDate = new Date(retryAfter);\n\t\t\tconst retryAfterMs = retryAfterDate.getTime();\n\t\t\tif (!Number.isNaN(retryAfterMs)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterMs - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitReset = headers.get(\"x-ratelimit-reset\");\n\t\tif (rateLimitReset) {\n\t\t\tconst resetSeconds = Number.parseInt(rateLimitReset, 10);\n\t\t\tif (!Number.isNaN(resetSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetSeconds * 1000 - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitResetAfter = headers.get(\"x-ratelimit-reset-after\");\n\t\tif (rateLimitResetAfter) {\n\t\t\tconst resetAfterSeconds = Number(rateLimitResetAfter);\n\t\t\tif (Number.isFinite(resetAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 1: \"Your quota will reset after ...\" (formats: \"18h31m10s\", \"10m15s\", \"6s\", \"39s\")\n\tconst durationMatch = errorText.match(/reset after (?:(\\d+)h)?(?:(\\d+)m)?(\\d+(?:\\.\\d+)?)s/i);\n\tif (durationMatch) {\n\t\tconst hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;\n\t\tconst minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;\n\t\tconst seconds = parseFloat(durationMatch[3]);\n\t\tif (!Number.isNaN(seconds)) {\n\t\t\tconst totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;\n\t\t\tconst delay = normalizeDelay(totalMs);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 2: \"Please retry in X[ms|s]\"\n\tconst retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);\n\tif (retryInMatch?.[1]) {\n\t\tconst value = parseFloat(retryInMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryInMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 3: \"retryDelay\": \"34.074824224s\" (JSON field in error details)\n\tconst retryDelayMatch = errorText.match(/\"retryDelay\":\\s*\"([0-9.]+)(ms|s)\"/i);\n\tif (retryDelayMatch?.[1]) {\n\t\tconst value = parseFloat(retryDelayMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryDelayMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn undefined;\n}\n\nfunction isClaudeThinkingModel(modelId: string): boolean {\n\tconst normalized = modelId.toLowerCase();\n\treturn normalized.includes(\"claude\") && normalized.includes(\"thinking\");\n}\n\nfunction isGemini3ProModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-pro/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3FlashModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-flash/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3Model(modelId: string): boolean {\n\treturn isGemini3ProModel(modelId) || isGemini3FlashModel(modelId);\n}\n\n/**\n * Check if an error is retryable (rate limit, server error, network error, etc.)\n */\nfunction isRetryableError(status: number, errorText: string): boolean {\n\tif (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {\n\t\treturn true;\n\t}\n\treturn /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);\n}\n\n/**\n * Extract a clean, user-friendly error message from Google API error response.\n * Parses JSON error responses and returns just the message field.\n */\nfunction extractErrorMessage(errorText: string): string {\n\ttry {\n\t\tconst parsed = JSON.parse(errorText) as { error?: { message?: string } };\n\t\tif (parsed.error?.message) {\n\t\t\treturn parsed.error.message;\n\t\t}\n\t} catch {\n\t\t// Not JSON, return as-is\n\t}\n\treturn errorText;\n}\n\n/**\n * Sleep for a given number of milliseconds, respecting abort signal.\n */\nfunction sleep(ms: number, signal?: AbortSignal): Promise<void> {\n\treturn new Promise((resolve, reject) => {\n\t\tif (signal?.aborted) {\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t\treturn;\n\t\t}\n\t\tconst timeout = setTimeout(resolve, ms);\n\t\tsignal?.addEventListener(\"abort\", () => {\n\t\t\tclearTimeout(timeout);\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t});\n\t});\n}\n\ninterface CloudCodeAssistRequest {\n\tproject: string;\n\tmodel: string;\n\trequest: {\n\t\tcontents: Content[];\n\t\tsessionId?: string;\n\t\tsystemInstruction?: { role?: string; parts: { text: string }[] };\n\t\tgenerationConfig?: {\n\t\t\tmaxOutputTokens?: number;\n\t\t\ttemperature?: number;\n\t\t\tthinkingConfig?: ThinkingConfig;\n\t\t};\n\t\ttools?: ReturnType<typeof convertTools>;\n\t\ttoolConfig?: {\n\t\t\tfunctionCallingConfig: {\n\t\t\t\tmode: ReturnType<typeof mapToolChoice>;\n\t\t\t};\n\t\t};\n\t};\n\trequestType?: string;\n\tuserAgent?: string;\n\trequestId?: string;\n}\n\ninterface CloudCodeAssistResponseChunk {\n\tresponse?: {\n\t\tcandidates?: Array<{\n\t\t\tcontent?: {\n\t\t\t\trole: string;\n\t\t\t\tparts?: Array<{\n\t\t\t\t\ttext?: string;\n\t\t\t\t\tthought?: boolean;\n\t\t\t\t\tthoughtSignature?: string;\n\t\t\t\t\tfunctionCall?: {\n\t\t\t\t\t\tname: string;\n\t\t\t\t\t\targs: Record<string, unknown>;\n\t\t\t\t\t\tid?: string;\n\t\t\t\t\t};\n\t\t\t\t}>;\n\t\t\t};\n\t\t\tfinishReason?: string;\n\t\t}>;\n\t\tusageMetadata?: {\n\t\t\tpromptTokenCount?: number;\n\t\t\tcandidatesTokenCount?: number;\n\t\t\tthoughtsTokenCount?: number;\n\t\t\ttotalTokenCount?: number;\n\t\t\tcachedContentTokenCount?: number;\n\t\t};\n\t\tmodelVersion?: string;\n\t\tresponseId?: string;\n\t};\n\ttraceId?: string;\n}\n\nexport const streamGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", GoogleGeminiCliOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: GoogleGeminiCliOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"google-gemini-cli\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\t// apiKey is JSON-encoded: { token, projectId }\n\t\t\tconst apiKeyRaw = options?.apiKey;\n\t\t\tif (!apiKeyRaw) {\n\t\t\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t\t\t}\n\n\t\t\tlet accessToken: string;\n\t\t\tlet projectId: string;\n\n\t\t\ttry {\n\t\t\t\tconst parsed = JSON.parse(apiKeyRaw) as { token: string; projectId: string };\n\t\t\t\taccessToken = parsed.token;\n\t\t\t\tprojectId = parsed.projectId;\n\t\t\t} catch {\n\t\t\t\tthrow new Error(\"Invalid Google Cloud Code Assist credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tif (!accessToken || !projectId) {\n\t\t\t\tthrow new Error(\"Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tconst isAntigravity = model.provider === \"google-antigravity\";\n\t\t\tconst baseUrl = model.baseUrl?.trim();\n\t\t\tconst endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];\n\n\t\t\tconst requestBody = buildRequest(model, context, projectId, options, isAntigravity);\n\t\t\toptions?.onPayload?.(requestBody);\n\t\t\tconst headers = isAntigravity ? getAntigravityHeaders() : GEMINI_CLI_HEADERS;\n\n\t\t\tconst requestHeaders = {\n\t\t\t\tAuthorization: `Bearer ${accessToken}`,\n\t\t\t\t\"Content-Type\": \"application/json\",\n\t\t\t\tAccept: \"text/event-stream\",\n\t\t\t\t...headers,\n\t\t\t\t...(isClaudeThinkingModel(model.id) ? { \"anthropic-beta\": CLAUDE_THINKING_BETA_HEADER } : {}),\n\t\t\t\t...options?.headers,\n\t\t\t};\n\t\t\tconst requestBodyJson = JSON.stringify(requestBody);\n\n\t\t\t// Fetch with retry logic for rate limits and transient errors\n\t\t\tlet response: Response | undefined;\n\t\t\tlet lastError: Error | undefined;\n\t\t\tlet requestUrl: string | undefined;\n\n\t\t\tfor (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\ttry {\n\t\t\t\t\tconst endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];\n\t\t\t\t\trequestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;\n\t\t\t\t\tresponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\n\t\t\t\t\tif (response.ok) {\n\t\t\t\t\t\tbreak; // Success, exit retry loop\n\t\t\t\t\t}\n\n\t\t\t\t\tconst errorText = await response.text();\n\n\t\t\t\t\t// Check if retryable\n\t\t\t\t\tif (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {\n\t\t\t\t\t\t// Use server-provided delay or exponential backoff\n\t\t\t\t\t\tconst serverDelay = extractRetryDelay(errorText, response);\n\t\t\t\t\t\tconst delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;\n\n\t\t\t\t\t\t// Check if server delay exceeds max allowed (default: 60s)\n\t\t\t\t\t\tconst maxDelayMs = options?.maxRetryDelayMs ?? 60000;\n\t\t\t\t\t\tif (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {\n\t\t\t\t\t\t\tconst delaySeconds = Math.ceil(serverDelay / 1000);\n\t\t\t\t\t\t\tthrow new Error(\n\t\t\t\t\t\t\t\t`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,\n\t\t\t\t\t\t\t);\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Not retryable or max retries exceeded\n\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);\n\t\t\t\t} catch (error) {\n\t\t\t\t\t// Check for abort - fetch throws AbortError, our code throws \"Request was aborted\"\n\t\t\t\t\tif (error instanceof Error) {\n\t\t\t\t\t\tif (error.name === \"AbortError\" || error.message === \"Request was aborted\") {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t// Extract detailed error message from fetch errors (Node includes cause)\n\t\t\t\t\tlastError = error instanceof Error ? error : new Error(String(error));\n\t\t\t\t\tif (lastError.message === \"fetch failed\" && lastError.cause instanceof Error) {\n\t\t\t\t\t\tlastError = new Error(`Network error: ${lastError.cause.message}`);\n\t\t\t\t\t}\n\t\t\t\t\t// Network errors are retryable\n\t\t\t\t\tif (attempt < MAX_RETRIES) {\n\t\t\t\t\t\tconst delayMs = BASE_DELAY_MS * 2 ** attempt;\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t\tthrow lastError;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!response || !response.ok) {\n\t\t\t\tthrow lastError ?? new Error(\"Failed to get response after retries\");\n\t\t\t}\n\n\t\t\tlet started = false;\n\t\t\tconst ensureStarted = () => {\n\t\t\t\tif (!started) {\n\t\t\t\t\tstream.push({ type: \"start\", partial: output });\n\t\t\t\t\tstarted = true;\n\t\t\t\t}\n\t\t\t};\n\n\t\t\tconst resetOutput = () => {\n\t\t\t\toutput.content = [];\n\t\t\t\toutput.usage = {\n\t\t\t\t\tinput: 0,\n\t\t\t\t\toutput: 0,\n\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\ttotalTokens: 0,\n\t\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t\t};\n\t\t\t\toutput.stopReason = \"stop\";\n\t\t\t\toutput.errorMessage = undefined;\n\t\t\t\toutput.timestamp = Date.now();\n\t\t\t\tstarted = false;\n\t\t\t};\n\n\t\t\tconst streamResponse = async (activeResponse: Response): Promise<boolean> => {\n\t\t\t\tif (!activeResponse.body) {\n\t\t\t\t\tthrow new Error(\"No response body\");\n\t\t\t\t}\n\n\t\t\t\tlet hasContent = false;\n\t\t\t\tlet currentBlock: TextContent | ThinkingContent | null = null;\n\t\t\t\tconst blocks = output.content;\n\t\t\t\tconst blockIndex = () => blocks.length - 1;\n\n\t\t\t\t// Read SSE stream\n\t\t\t\tconst reader = activeResponse.body.getReader();\n\t\t\t\tconst decoder = new TextDecoder();\n\t\t\t\tlet buffer = \"\";\n\n\t\t\t\t// Set up abort handler to cancel reader when signal fires\n\t\t\t\tconst abortHandler = () => {\n\t\t\t\t\tvoid reader.cancel().catch(() => {});\n\t\t\t\t};\n\t\t\t\toptions?.signal?.addEventListener(\"abort\", abortHandler);\n\n\t\t\t\ttry {\n\t\t\t\t\twhile (true) {\n\t\t\t\t\t\t// Check abort signal before each read\n\t\t\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tconst { done, value } = await reader.read();\n\t\t\t\t\t\tif (done) break;\n\n\t\t\t\t\t\tbuffer += decoder.decode(value, { stream: true });\n\t\t\t\t\t\tconst lines = buffer.split(\"\\n\");\n\t\t\t\t\t\tbuffer = lines.pop() || \"\";\n\n\t\t\t\t\t\tfor (const line of lines) {\n\t\t\t\t\t\t\tif (!line.startsWith(\"data:\")) continue;\n\n\t\t\t\t\t\t\tconst jsonStr = line.slice(5).trim();\n\t\t\t\t\t\t\tif (!jsonStr) continue;\n\n\t\t\t\t\t\t\tlet chunk: CloudCodeAssistResponseChunk;\n\t\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\t\tchunk = JSON.parse(jsonStr);\n\t\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\t\tcontinue;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t// Unwrap the response\n\t\t\t\t\t\t\tconst responseData = chunk.response;\n\t\t\t\t\t\t\tif (!responseData) continue;\n\n\t\t\t\t\t\t\tconst candidate = responseData.candidates?.[0];\n\t\t\t\t\t\t\tif (candidate?.content?.parts) {\n\t\t\t\t\t\t\t\tfor (const part of candidate.content.parts) {\n\t\t\t\t\t\t\t\t\tif (part.text !== undefined) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tconst isThinking = isThinkingPart(part);\n\t\t\t\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t\t\t\t!currentBlock ||\n\t\t\t\t\t\t\t\t\t\t\t(isThinking && currentBlock.type !== \"thinking\") ||\n\t\t\t\t\t\t\t\t\t\t\t(!isThinking && currentBlock.type !== \"text\")\n\t\t\t\t\t\t\t\t\t\t) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blocks.length - 1,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tif (isThinking) {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"thinking\", thinking: \"\", thinkingSignature: undefined };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_start\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"text\", text: \"\" };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"thinking\") {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinking += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.text += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\tif (part.functionCall) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = null;\n\t\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\t\tconst providedId = part.functionCall.id;\n\t\t\t\t\t\t\t\t\t\tconst needsNewId =\n\t\t\t\t\t\t\t\t\t\t\t!providedId ||\n\t\t\t\t\t\t\t\t\t\t\toutput.content.some((b) => b.type === \"toolCall\" && b.id === providedId);\n\t\t\t\t\t\t\t\t\t\tconst toolCallId = needsNewId\n\t\t\t\t\t\t\t\t\t\t\t? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`\n\t\t\t\t\t\t\t\t\t\t\t: providedId;\n\n\t\t\t\t\t\t\t\t\t\tconst toolCall: ToolCall = {\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\t\t\t\t\tid: toolCallId,\n\t\t\t\t\t\t\t\t\t\t\tname: part.functionCall.name || \"\",\n\t\t\t\t\t\t\t\t\t\t\targuments: (part.functionCall.args as Record<string, unknown>) ?? {},\n\t\t\t\t\t\t\t\t\t\t\t...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),\n\t\t\t\t\t\t\t\t\t\t};\n\n\t\t\t\t\t\t\t\t\t\toutput.content.push(toolCall);\n\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\tdelta: JSON.stringify(toolCall.arguments),\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\ttoolCall,\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (candidate?.finishReason) {\n\t\t\t\t\t\t\t\toutput.stopReason = mapStopReasonString(candidate.finishReason);\n\t\t\t\t\t\t\t\tif (output.content.some((b) => b.type === \"toolCall\")) {\n\t\t\t\t\t\t\t\t\toutput.stopReason = \"toolUse\";\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (responseData.usageMetadata) {\n\t\t\t\t\t\t\t\t// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input\n\t\t\t\t\t\t\t\tconst promptTokens = responseData.usageMetadata.promptTokenCount || 0;\n\t\t\t\t\t\t\t\tconst cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;\n\t\t\t\t\t\t\t\toutput.usage = {\n\t\t\t\t\t\t\t\t\tinput: promptTokens - cacheReadTokens,\n\t\t\t\t\t\t\t\t\toutput:\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.candidatesTokenCount || 0) +\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.thoughtsTokenCount || 0),\n\t\t\t\t\t\t\t\t\tcacheRead: cacheReadTokens,\n\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\ttotalTokens: responseData.usageMetadata.totalTokenCount || 0,\n\t\t\t\t\t\t\t\t\tcost: {\n\t\t\t\t\t\t\t\t\t\tinput: 0,\n\t\t\t\t\t\t\t\t\t\toutput: 0,\n\t\t\t\t\t\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\t\ttotal: 0,\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} finally {\n\t\t\t\t\toptions?.signal?.removeEventListener(\"abort\", abortHandler);\n\t\t\t\t}\n\n\t\t\t\tif (currentBlock) {\n\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t} else {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\treturn hasContent;\n\t\t\t};\n\n\t\t\tlet receivedContent = false;\n\t\t\tlet currentResponse = response;\n\n\t\t\tfor (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt > 0) {\n\t\t\t\t\tconst backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);\n\t\t\t\t\tawait sleep(backoffMs, options?.signal);\n\n\t\t\t\t\tif (!requestUrl) {\n\t\t\t\t\t\tthrow new Error(\"Missing request URL\");\n\t\t\t\t\t}\n\n\t\t\t\t\tcurrentResponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\n\t\t\t\t\tif (!currentResponse.ok) {\n\t\t\t\t\t\tconst retryErrorText = await currentResponse.text();\n\t\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst streamed = await streamResponse(currentResponse);\n\t\t\t\tif (streamed) {\n\t\t\t\t\treceivedContent = true;\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {\n\t\t\t\t\tresetOutput();\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!receivedContent) {\n\t\t\t\tthrow new Error(\"Cloud Code Assist API returned an empty response\");\n\t\t\t}\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tif (\"index\" in block) {\n\t\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t}\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", SimpleStreamOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey;\n\tif (!apiKey) {\n\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tif (!options?.reasoning) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: { enabled: false },\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst effort = clampReasoning(options.reasoning)!;\n\tif (isGemini3Model(model.id)) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: {\n\t\t\t\tenabled: true,\n\t\t\t\tlevel: getGeminiCliThinkingLevel(effort, model.id),\n\t\t\t},\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst defaultBudgets: ThinkingBudgets = {\n\t\tminimal: 1024,\n\t\tlow: 2048,\n\t\tmedium: 8192,\n\t\thigh: 16384,\n\t};\n\tconst budgets = { ...defaultBudgets, ...options.thinkingBudgets };\n\n\tconst minOutputTokens = 1024;\n\tlet thinkingBudget = budgets[effort]!;\n\tconst maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);\n\n\tif (maxTokens <= thinkingBudget) {\n\t\tthinkingBudget = Math.max(0, maxTokens - minOutputTokens);\n\t}\n\n\treturn streamGoogleGeminiCli(model, context, {\n\t\t...base,\n\t\tmaxTokens,\n\t\tthinking: {\n\t\t\tenabled: true,\n\t\t\tbudgetTokens: thinkingBudget,\n\t\t},\n\t} satisfies GoogleGeminiCliOptions);\n};\n\nexport function buildRequest(\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\tprojectId: string,\n\toptions: GoogleGeminiCliOptions = {},\n\tisAntigravity = false,\n): CloudCodeAssistRequest {\n\tconst contents = convertMessages(model, context);\n\n\tconst generationConfig: CloudCodeAssistRequest[\"request\"][\"generationConfig\"] = {};\n\tif (options.temperature !== undefined) {\n\t\tgenerationConfig.temperature = options.temperature;\n\t}\n\tif (options.maxTokens !== undefined) {\n\t\tgenerationConfig.maxOutputTokens = options.maxTokens;\n\t}\n\n\t// Thinking config\n\tif (options.thinking?.enabled && model.reasoning) {\n\t\tgenerationConfig.thinkingConfig = {\n\t\t\tincludeThoughts: true,\n\t\t};\n\t\t// Gemini 3 models use thinkingLevel, older models use thinkingBudget\n\t\tif (options.thinking.level !== undefined) {\n\t\t\t// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values\n\t\t\tgenerationConfig.thinkingConfig.thinkingLevel = options.thinking.level as any;\n\t\t} else if (options.thinking.budgetTokens !== undefined) {\n\t\t\tgenerationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;\n\t\t}\n\t}\n\n\tconst request: CloudCodeAssistRequest[\"request\"] = {\n\t\tcontents,\n\t};\n\n\trequest.sessionId = options.sessionId;\n\n\t// System instruction must be object with parts, not plain string\n\tif (context.systemPrompt) {\n\t\trequest.systemInstruction = {\n\t\t\tparts: [{ text: sanitizeSurrogates(context.systemPrompt) }],\n\t\t};\n\t}\n\n\tif (Object.keys(generationConfig).length > 0) {\n\t\trequest.generationConfig = generationConfig;\n\t}\n\n\tif (context.tools && context.tools.length > 0) {\n\t\t// Claude models on Cloud Code Assist need the legacy `parameters` field;\n\t\t// the API translates it into Anthropic's `input_schema`.\n\t\tconst useParameters = model.id.startsWith(\"claude-\");\n\t\trequest.tools = convertTools(context.tools, useParameters);\n\t\tif (options.toolChoice) {\n\t\t\trequest.toolConfig = {\n\t\t\t\tfunctionCallingConfig: {\n\t\t\t\t\tmode: mapToolChoice(options.toolChoice),\n\t\t\t\t},\n\t\t\t};\n\t\t}\n\t}\n\n\tif (isAntigravity) {\n\t\tconst existingParts = request.systemInstruction?.parts ?? [];\n\t\trequest.systemInstruction = {\n\t\t\trole: \"user\",\n\t\t\tparts: [\n\t\t\t\t{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },\n\t\t\t\t{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },\n\t\t\t\t...existingParts,\n\t\t\t],\n\t\t};\n\t}\n\n\treturn {\n\t\tproject: projectId,\n\t\tmodel: model.id,\n\t\trequest,\n\t\t...(isAntigravity ? { requestType: \"agent\" } : {}),\n\t\tuserAgent: isAntigravity ? \"antigravity\" : \"pi-coding-agent\",\n\t\trequestId: `${isAntigravity ? \"agent\" : \"draht\"}-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,\n\t};\n}\n\ntype ClampedThinkingLevel = Exclude<ThinkingLevel, \"xhigh\">;\n\nfunction getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {\n\tif (isGemini3ProModel(modelId)) {\n\t\tswitch (effort) {\n\t\t\tcase \"minimal\":\n\t\t\tcase \"low\":\n\t\t\t\treturn \"LOW\";\n\t\t\tcase \"medium\":\n\t\t\tcase \"high\":\n\t\t\t\treturn \"HIGH\";\n\t\t}\n\t}\n\tswitch (effort) {\n\t\tcase \"minimal\":\n\t\t\treturn \"MINIMAL\";\n\t\tcase \"low\":\n\t\t\treturn \"LOW\";\n\t\tcase \"medium\":\n\t\t\treturn \"MEDIUM\";\n\t\tcase \"high\":\n\t\t\treturn \"HIGH\";\n\t}\n}\n"]}
1
+ {"version":3,"file":"google-gemini-cli.d.ts","sourceRoot":"","sources":["../../src/providers/google-gemini-cli.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE7D,OAAO,KAAK,EAGX,OAAO,EACP,KAAK,EACL,mBAAmB,EACnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AAGrB,OAAO,EAEN,YAAY,EAGZ,aAAa,EAEb,MAAM,oBAAoB,CAAC;AAG5B;;;GAGG;AACH,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,GAAG,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEvG,MAAM,WAAW,sBAAuB,SAAQ,aAAa;IAC5D,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;IACrC;;;;;;OAMG;IACH,QAAQ,CAAC,EAAE;QACV,OAAO,EAAE,OAAO,CAAC;QACjB,4DAA4D;QAC5D,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,qGAAqG;QACrG,KAAK,CAAC,EAAE,mBAAmB,CAAC;KAC5B,CAAC;IACF,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAgDD;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,QAAQ,GAAG,OAAO,GAAG,MAAM,GAAG,SAAS,CAyFtG;AA6DD,UAAU,sBAAsB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE;QACR,QAAQ,EAAE,OAAO,EAAE,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,iBAAiB,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE;gBAAE,IAAI,EAAE,MAAM,CAAA;aAAE,EAAE,CAAA;SAAE,CAAC;QACjE,gBAAgB,CAAC,EAAE;YAClB,eAAe,CAAC,EAAE,MAAM,CAAC;YACzB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,cAAc,CAAC,EAAE,cAAc,CAAC;SAChC,CAAC;QACF,KAAK,CAAC,EAAE,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC;QACxC,UAAU,CAAC,EAAE;YACZ,qBAAqB,EAAE;gBACtB,IAAI,EAAE,UAAU,CAAC,OAAO,aAAa,CAAC,CAAC;aACvC,CAAC;SACF,CAAC;KACF,CAAC;IACF,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAiCD,eAAO,MAAM,qBAAqB,EAAE,cAAc,CAAC,mBAAmB,EAAE,sBAAsB,CAme7F,CAAC;AAEF,eAAO,MAAM,2BAA2B,EAAE,cAAc,CAAC,mBAAmB,EAAE,mBAAmB,CAqDhG,CAAC;AAEF,wBAAgB,YAAY,CAC3B,KAAK,EAAE,KAAK,CAAC,mBAAmB,CAAC,EACjC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE,sBAA2B,EACpC,aAAa,UAAQ,GACnB,sBAAsB,CA4ExB","sourcesContent":["/**\n * Google Gemini CLI / Antigravity provider.\n * Shared implementation for both google-gemini-cli and google-antigravity providers.\n * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.\n */\n\nimport type { Content, ThinkingConfig } from \"@google/genai\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tContext,\n\tModel,\n\tSimpleStreamOptions,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingBudgets,\n\tThinkingContent,\n\tThinkingLevel,\n\tToolCall,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport {\n\tconvertMessages,\n\tconvertTools,\n\tisThinkingPart,\n\tmapStopReasonString,\n\tmapToolChoice,\n\tretainThoughtSignature,\n} from \"./google-shared.js\";\nimport { buildBaseOptions, clampReasoning } from \"./simple-options.js\";\n\n/**\n * Thinking level for Gemini 3 models.\n * Mirrors Google's ThinkingLevel enum values.\n */\nexport type GoogleThinkingLevel = \"THINKING_LEVEL_UNSPECIFIED\" | \"MINIMAL\" | \"LOW\" | \"MEDIUM\" | \"HIGH\";\n\nexport interface GoogleGeminiCliOptions extends StreamOptions {\n\ttoolChoice?: \"auto\" | \"none\" | \"any\";\n\t/**\n\t * Thinking/reasoning configuration.\n\t * - Gemini 2.x models: use `budgetTokens` to set the thinking budget\n\t * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead\n\t *\n\t * When using `streamSimple`, this is handled automatically based on the model.\n\t */\n\tthinking?: {\n\t\tenabled: boolean;\n\t\t/** Thinking budget in tokens. Use for Gemini 2.x models. */\n\t\tbudgetTokens?: number;\n\t\t/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */\n\t\tlevel?: GoogleThinkingLevel;\n\t};\n\tprojectId?: string;\n}\n\nconst DEFAULT_ENDPOINT = \"https://cloudcode-pa.googleapis.com\";\nconst ANTIGRAVITY_DAILY_ENDPOINT = \"https://daily-cloudcode-pa.sandbox.googleapis.com\";\nconst ANTIGRAVITY_AUTOPUSH_ENDPOINT = \"https://autopush-cloudcode-pa.sandbox.googleapis.com\";\nconst ANTIGRAVITY_ENDPOINT_FALLBACKS = [\n\tANTIGRAVITY_DAILY_ENDPOINT,\n\tANTIGRAVITY_AUTOPUSH_ENDPOINT,\n\tDEFAULT_ENDPOINT,\n] as const;\n// Headers for Gemini CLI (prod endpoint)\nconst GEMINI_CLI_HEADERS = {\n\t\"User-Agent\": \"google-cloud-sdk vscode_cloudshelleditor/0.1\",\n\t\"X-Goog-Api-Client\": \"gl-node/22.17.0\",\n\t\"Client-Metadata\": JSON.stringify({\n\t\tideType: \"IDE_UNSPECIFIED\",\n\t\tplatform: \"PLATFORM_UNSPECIFIED\",\n\t\tpluginType: \"GEMINI\",\n\t}),\n};\n\n// Headers for Antigravity (sandbox endpoint) - requires specific User-Agent\nconst DEFAULT_ANTIGRAVITY_VERSION = \"1.18.4\";\n\nfunction getAntigravityHeaders() {\n\tconst version = process.env.DRAHT_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;\n\treturn {\n\t\t\"User-Agent\": `antigravity/${version} darwin/arm64`,\n\t};\n}\n\n// Antigravity system instruction (compact version from CLIProxyAPI).\nconst ANTIGRAVITY_SYSTEM_INSTRUCTION =\n\t\"You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.\" +\n\t\"You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.\" +\n\t\"**Absolute paths only**\" +\n\t\"**Proactiveness**\";\n\n// Counter for generating unique tool call IDs\nlet toolCallCounter = 0;\n\n// Retry configuration\nconst MAX_RETRIES = 3;\nconst BASE_DELAY_MS = 1000;\nconst MAX_EMPTY_STREAM_RETRIES = 2;\nconst EMPTY_STREAM_BASE_DELAY_MS = 500;\nconst CLAUDE_THINKING_BETA_HEADER = \"interleaved-thinking-2025-05-14\";\n\n/**\n * Extract retry delay from Gemini error response (in milliseconds).\n * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),\n * then parses body patterns like:\n * - \"Your quota will reset after 39s\"\n * - \"Your quota will reset after 18h31m10s\"\n * - \"Please retry in Xs\" or \"Please retry in Xms\"\n * - \"retryDelay\": \"34.074824224s\" (JSON field)\n */\nexport function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {\n\tconst normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);\n\n\tconst headers = response instanceof Headers ? response : response?.headers;\n\tif (headers) {\n\t\tconst retryAfter = headers.get(\"retry-after\");\n\t\tif (retryAfter) {\n\t\t\tconst retryAfterSeconds = Number(retryAfter);\n\t\t\tif (Number.isFinite(retryAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t\tconst retryAfterDate = new Date(retryAfter);\n\t\t\tconst retryAfterMs = retryAfterDate.getTime();\n\t\t\tif (!Number.isNaN(retryAfterMs)) {\n\t\t\t\tconst delay = normalizeDelay(retryAfterMs - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitReset = headers.get(\"x-ratelimit-reset\");\n\t\tif (rateLimitReset) {\n\t\t\tconst resetSeconds = Number.parseInt(rateLimitReset, 10);\n\t\t\tif (!Number.isNaN(resetSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetSeconds * 1000 - Date.now());\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tconst rateLimitResetAfter = headers.get(\"x-ratelimit-reset-after\");\n\t\tif (rateLimitResetAfter) {\n\t\t\tconst resetAfterSeconds = Number(rateLimitResetAfter);\n\t\t\tif (Number.isFinite(resetAfterSeconds)) {\n\t\t\t\tconst delay = normalizeDelay(resetAfterSeconds * 1000);\n\t\t\t\tif (delay !== undefined) {\n\t\t\t\t\treturn delay;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 1: \"Your quota will reset after ...\" (formats: \"18h31m10s\", \"10m15s\", \"6s\", \"39s\")\n\tconst durationMatch = errorText.match(/reset after (?:(\\d+)h)?(?:(\\d+)m)?(\\d+(?:\\.\\d+)?)s/i);\n\tif (durationMatch) {\n\t\tconst hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;\n\t\tconst minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;\n\t\tconst seconds = parseFloat(durationMatch[3]);\n\t\tif (!Number.isNaN(seconds)) {\n\t\t\tconst totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;\n\t\t\tconst delay = normalizeDelay(totalMs);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 2: \"Please retry in X[ms|s]\"\n\tconst retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);\n\tif (retryInMatch?.[1]) {\n\t\tconst value = parseFloat(retryInMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryInMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Pattern 3: \"retryDelay\": \"34.074824224s\" (JSON field in error details)\n\tconst retryDelayMatch = errorText.match(/\"retryDelay\":\\s*\"([0-9.]+)(ms|s)\"/i);\n\tif (retryDelayMatch?.[1]) {\n\t\tconst value = parseFloat(retryDelayMatch[1]);\n\t\tif (!Number.isNaN(value) && value > 0) {\n\t\t\tconst ms = retryDelayMatch[2].toLowerCase() === \"ms\" ? value : value * 1000;\n\t\t\tconst delay = normalizeDelay(ms);\n\t\t\tif (delay !== undefined) {\n\t\t\t\treturn delay;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn undefined;\n}\n\nfunction needsClaudeThinkingBetaHeader(model: Model<\"google-gemini-cli\">): boolean {\n\treturn model.provider === \"google-antigravity\" && model.id.startsWith(\"claude-\") && model.reasoning;\n}\n\nfunction isGemini3ProModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-pro/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3FlashModel(modelId: string): boolean {\n\treturn /gemini-3(?:\\.1)?-flash/.test(modelId.toLowerCase());\n}\n\nfunction isGemini3Model(modelId: string): boolean {\n\treturn isGemini3ProModel(modelId) || isGemini3FlashModel(modelId);\n}\n\n/**\n * Check if an error is retryable (rate limit, server error, network error, etc.)\n */\nfunction isRetryableError(status: number, errorText: string): boolean {\n\tif (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {\n\t\treturn true;\n\t}\n\treturn /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);\n}\n\n/**\n * Extract a clean, user-friendly error message from Google API error response.\n * Parses JSON error responses and returns just the message field.\n */\nfunction extractErrorMessage(errorText: string): string {\n\ttry {\n\t\tconst parsed = JSON.parse(errorText) as { error?: { message?: string } };\n\t\tif (parsed.error?.message) {\n\t\t\treturn parsed.error.message;\n\t\t}\n\t} catch {\n\t\t// Not JSON, return as-is\n\t}\n\treturn errorText;\n}\n\n/**\n * Sleep for a given number of milliseconds, respecting abort signal.\n */\nfunction sleep(ms: number, signal?: AbortSignal): Promise<void> {\n\treturn new Promise((resolve, reject) => {\n\t\tif (signal?.aborted) {\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t\treturn;\n\t\t}\n\t\tconst timeout = setTimeout(resolve, ms);\n\t\tsignal?.addEventListener(\"abort\", () => {\n\t\t\tclearTimeout(timeout);\n\t\t\treject(new Error(\"Request was aborted\"));\n\t\t});\n\t});\n}\n\ninterface CloudCodeAssistRequest {\n\tproject: string;\n\tmodel: string;\n\trequest: {\n\t\tcontents: Content[];\n\t\tsessionId?: string;\n\t\tsystemInstruction?: { role?: string; parts: { text: string }[] };\n\t\tgenerationConfig?: {\n\t\t\tmaxOutputTokens?: number;\n\t\t\ttemperature?: number;\n\t\t\tthinkingConfig?: ThinkingConfig;\n\t\t};\n\t\ttools?: ReturnType<typeof convertTools>;\n\t\ttoolConfig?: {\n\t\t\tfunctionCallingConfig: {\n\t\t\t\tmode: ReturnType<typeof mapToolChoice>;\n\t\t\t};\n\t\t};\n\t};\n\trequestType?: string;\n\tuserAgent?: string;\n\trequestId?: string;\n}\n\ninterface CloudCodeAssistResponseChunk {\n\tresponse?: {\n\t\tcandidates?: Array<{\n\t\t\tcontent?: {\n\t\t\t\trole: string;\n\t\t\t\tparts?: Array<{\n\t\t\t\t\ttext?: string;\n\t\t\t\t\tthought?: boolean;\n\t\t\t\t\tthoughtSignature?: string;\n\t\t\t\t\tfunctionCall?: {\n\t\t\t\t\t\tname: string;\n\t\t\t\t\t\targs: Record<string, unknown>;\n\t\t\t\t\t\tid?: string;\n\t\t\t\t\t};\n\t\t\t\t}>;\n\t\t\t};\n\t\t\tfinishReason?: string;\n\t\t}>;\n\t\tusageMetadata?: {\n\t\t\tpromptTokenCount?: number;\n\t\t\tcandidatesTokenCount?: number;\n\t\t\tthoughtsTokenCount?: number;\n\t\t\ttotalTokenCount?: number;\n\t\t\tcachedContentTokenCount?: number;\n\t\t};\n\t\tmodelVersion?: string;\n\t\tresponseId?: string;\n\t};\n\ttraceId?: string;\n}\n\nexport const streamGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", GoogleGeminiCliOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: GoogleGeminiCliOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: \"google-gemini-cli\" as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\t// apiKey is JSON-encoded: { token, projectId }\n\t\t\tconst apiKeyRaw = options?.apiKey;\n\t\t\tif (!apiKeyRaw) {\n\t\t\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t\t\t}\n\n\t\t\tlet accessToken: string;\n\t\t\tlet projectId: string;\n\n\t\t\ttry {\n\t\t\t\tconst parsed = JSON.parse(apiKeyRaw) as { token: string; projectId: string };\n\t\t\t\taccessToken = parsed.token;\n\t\t\t\tprojectId = parsed.projectId;\n\t\t\t} catch {\n\t\t\t\tthrow new Error(\"Invalid Google Cloud Code Assist credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tif (!accessToken || !projectId) {\n\t\t\t\tthrow new Error(\"Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.\");\n\t\t\t}\n\n\t\t\tconst isAntigravity = model.provider === \"google-antigravity\";\n\t\t\tconst baseUrl = model.baseUrl?.trim();\n\t\t\tconst endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];\n\n\t\t\tlet requestBody = buildRequest(model, context, projectId, options, isAntigravity);\n\t\t\tconst nextRequestBody = await options?.onPayload?.(requestBody, model);\n\t\t\tif (nextRequestBody !== undefined) {\n\t\t\t\trequestBody = nextRequestBody as CloudCodeAssistRequest;\n\t\t\t}\n\t\t\tconst headers = isAntigravity ? getAntigravityHeaders() : GEMINI_CLI_HEADERS;\n\n\t\t\tconst requestHeaders = {\n\t\t\t\tAuthorization: `Bearer ${accessToken}`,\n\t\t\t\t\"Content-Type\": \"application/json\",\n\t\t\t\tAccept: \"text/event-stream\",\n\t\t\t\t...headers,\n\t\t\t\t...(needsClaudeThinkingBetaHeader(model) ? { \"anthropic-beta\": CLAUDE_THINKING_BETA_HEADER } : {}),\n\t\t\t\t...options?.headers,\n\t\t\t};\n\t\t\tconst requestBodyJson = JSON.stringify(requestBody);\n\n\t\t\t// Fetch with retry logic for rate limits, transient errors, and endpoint fallbacks.\n\t\t\t// On 403/404, immediately try the next endpoint (no delay).\n\t\t\t// On 429/5xx, retry with backoff on the same or next endpoint.\n\t\t\tlet response: Response | undefined;\n\t\t\tlet lastError: Error | undefined;\n\t\t\tlet requestUrl: string | undefined;\n\t\t\tlet endpointIndex = 0;\n\n\t\t\tfor (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\ttry {\n\t\t\t\t\tconst endpoint = endpoints[endpointIndex];\n\t\t\t\t\trequestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;\n\t\t\t\t\tresponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\n\t\t\t\t\tif (response.ok) {\n\t\t\t\t\t\tbreak; // Success, exit retry loop\n\t\t\t\t\t}\n\n\t\t\t\t\tconst errorText = await response.text();\n\n\t\t\t\t\t// On 403/404, cascade to the next endpoint immediately (no delay)\n\t\t\t\t\tif ((response.status === 403 || response.status === 404) && endpointIndex < endpoints.length - 1) {\n\t\t\t\t\t\tendpointIndex++;\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Check if retryable (429, 5xx, network patterns)\n\t\t\t\t\tif (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {\n\t\t\t\t\t\t// Advance endpoint if possible\n\t\t\t\t\t\tif (endpointIndex < endpoints.length - 1) {\n\t\t\t\t\t\t\tendpointIndex++;\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\t// Use server-provided delay or exponential backoff\n\t\t\t\t\t\tconst serverDelay = extractRetryDelay(errorText, response);\n\t\t\t\t\t\tconst delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;\n\n\t\t\t\t\t\t// Check if server delay exceeds max allowed (default: 60s)\n\t\t\t\t\t\tconst maxDelayMs = options?.maxRetryDelayMs ?? 60000;\n\t\t\t\t\t\tif (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {\n\t\t\t\t\t\t\tconst delaySeconds = Math.ceil(serverDelay / 1000);\n\t\t\t\t\t\t\tthrow new Error(\n\t\t\t\t\t\t\t\t`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,\n\t\t\t\t\t\t\t);\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Not retryable or max retries exceeded\n\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);\n\t\t\t\t} catch (error) {\n\t\t\t\t\t// Check for abort - fetch throws AbortError, our code throws \"Request was aborted\"\n\t\t\t\t\tif (error instanceof Error) {\n\t\t\t\t\t\tif (error.name === \"AbortError\" || error.message === \"Request was aborted\") {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t\t// Extract detailed error message from fetch errors (Node includes cause)\n\t\t\t\t\tlastError = error instanceof Error ? error : new Error(String(error));\n\t\t\t\t\tif (lastError.message === \"fetch failed\" && lastError.cause instanceof Error) {\n\t\t\t\t\t\tlastError = new Error(`Network error: ${lastError.cause.message}`);\n\t\t\t\t\t}\n\t\t\t\t\t// Network errors are retryable\n\t\t\t\t\tif (attempt < MAX_RETRIES) {\n\t\t\t\t\t\tconst delayMs = BASE_DELAY_MS * 2 ** attempt;\n\t\t\t\t\t\tawait sleep(delayMs, options?.signal);\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t\tthrow lastError;\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!response || !response.ok) {\n\t\t\t\tthrow lastError ?? new Error(\"Failed to get response after retries\");\n\t\t\t}\n\n\t\t\tlet started = false;\n\t\t\tconst ensureStarted = () => {\n\t\t\t\tif (!started) {\n\t\t\t\t\tstream.push({ type: \"start\", partial: output });\n\t\t\t\t\tstarted = true;\n\t\t\t\t}\n\t\t\t};\n\n\t\t\tconst resetOutput = () => {\n\t\t\t\toutput.content = [];\n\t\t\t\toutput.usage = {\n\t\t\t\t\tinput: 0,\n\t\t\t\t\toutput: 0,\n\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\ttotalTokens: 0,\n\t\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t\t};\n\t\t\t\toutput.stopReason = \"stop\";\n\t\t\t\toutput.errorMessage = undefined;\n\t\t\t\toutput.timestamp = Date.now();\n\t\t\t\tstarted = false;\n\t\t\t};\n\n\t\t\tconst streamResponse = async (activeResponse: Response): Promise<boolean> => {\n\t\t\t\tif (!activeResponse.body) {\n\t\t\t\t\tthrow new Error(\"No response body\");\n\t\t\t\t}\n\n\t\t\t\tlet hasContent = false;\n\t\t\t\tlet currentBlock: TextContent | ThinkingContent | null = null;\n\t\t\t\tconst blocks = output.content;\n\t\t\t\tconst blockIndex = () => blocks.length - 1;\n\n\t\t\t\t// Read SSE stream\n\t\t\t\tconst reader = activeResponse.body.getReader();\n\t\t\t\tconst decoder = new TextDecoder();\n\t\t\t\tlet buffer = \"\";\n\n\t\t\t\t// Set up abort handler to cancel reader when signal fires\n\t\t\t\tconst abortHandler = () => {\n\t\t\t\t\tvoid reader.cancel().catch(() => {});\n\t\t\t\t};\n\t\t\t\toptions?.signal?.addEventListener(\"abort\", abortHandler);\n\n\t\t\t\ttry {\n\t\t\t\t\twhile (true) {\n\t\t\t\t\t\t// Check abort signal before each read\n\t\t\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t\t\t}\n\n\t\t\t\t\t\tconst { done, value } = await reader.read();\n\t\t\t\t\t\tif (done) break;\n\n\t\t\t\t\t\tbuffer += decoder.decode(value, { stream: true });\n\t\t\t\t\t\tconst lines = buffer.split(\"\\n\");\n\t\t\t\t\t\tbuffer = lines.pop() || \"\";\n\n\t\t\t\t\t\tfor (const line of lines) {\n\t\t\t\t\t\t\tif (!line.startsWith(\"data:\")) continue;\n\n\t\t\t\t\t\t\tconst jsonStr = line.slice(5).trim();\n\t\t\t\t\t\t\tif (!jsonStr) continue;\n\n\t\t\t\t\t\t\tlet chunk: CloudCodeAssistResponseChunk;\n\t\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\t\tchunk = JSON.parse(jsonStr);\n\t\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\t\tcontinue;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t// Unwrap the response\n\t\t\t\t\t\t\tconst responseData = chunk.response;\n\t\t\t\t\t\t\tif (!responseData) continue;\n\n\t\t\t\t\t\t\tconst candidate = responseData.candidates?.[0];\n\t\t\t\t\t\t\tif (candidate?.content?.parts) {\n\t\t\t\t\t\t\t\tfor (const part of candidate.content.parts) {\n\t\t\t\t\t\t\t\t\tif (part.text !== undefined) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tconst isThinking = isThinkingPart(part);\n\t\t\t\t\t\t\t\t\t\tif (\n\t\t\t\t\t\t\t\t\t\t\t!currentBlock ||\n\t\t\t\t\t\t\t\t\t\t\t(isThinking && currentBlock.type !== \"thinking\") ||\n\t\t\t\t\t\t\t\t\t\t\t(!isThinking && currentBlock.type !== \"text\")\n\t\t\t\t\t\t\t\t\t\t) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blocks.length - 1,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tif (isThinking) {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"thinking\", thinking: \"\", thinkingSignature: undefined };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_start\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = { type: \"text\", text: \"\" };\n\t\t\t\t\t\t\t\t\t\t\t\toutput.content.push(currentBlock);\n\t\t\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"thinking\") {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinking += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.thinkingSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.text += part.text;\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature = retainThoughtSignature(\n\t\t\t\t\t\t\t\t\t\t\t\tcurrentBlock.textSignature,\n\t\t\t\t\t\t\t\t\t\t\t\tpart.thoughtSignature,\n\t\t\t\t\t\t\t\t\t\t\t);\n\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\tdelta: part.text,\n\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\tif (part.functionCall) {\n\t\t\t\t\t\t\t\t\t\thasContent = true;\n\t\t\t\t\t\t\t\t\t\tif (currentBlock) {\n\t\t\t\t\t\t\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t\t\t\tcurrentBlock = null;\n\t\t\t\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\t\t\t\tconst providedId = part.functionCall.id;\n\t\t\t\t\t\t\t\t\t\tconst needsNewId =\n\t\t\t\t\t\t\t\t\t\t\t!providedId ||\n\t\t\t\t\t\t\t\t\t\t\toutput.content.some((b) => b.type === \"toolCall\" && b.id === providedId);\n\t\t\t\t\t\t\t\t\t\tconst toolCallId = needsNewId\n\t\t\t\t\t\t\t\t\t\t\t? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`\n\t\t\t\t\t\t\t\t\t\t\t: providedId;\n\n\t\t\t\t\t\t\t\t\t\tconst toolCall: ToolCall = {\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\t\t\t\t\tid: toolCallId,\n\t\t\t\t\t\t\t\t\t\t\tname: part.functionCall.name || \"\",\n\t\t\t\t\t\t\t\t\t\t\targuments: (part.functionCall.args as Record<string, unknown>) ?? {},\n\t\t\t\t\t\t\t\t\t\t\t...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),\n\t\t\t\t\t\t\t\t\t\t};\n\n\t\t\t\t\t\t\t\t\t\toutput.content.push(toolCall);\n\t\t\t\t\t\t\t\t\t\tensureStarted();\n\t\t\t\t\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: blockIndex(), partial: output });\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\tdelta: JSON.stringify(toolCall.arguments),\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\t\t\t\t\ttoolCall,\n\t\t\t\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t\t\t\t});\n\t\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (candidate?.finishReason) {\n\t\t\t\t\t\t\t\toutput.stopReason = mapStopReasonString(candidate.finishReason);\n\t\t\t\t\t\t\t\tif (output.content.some((b) => b.type === \"toolCall\")) {\n\t\t\t\t\t\t\t\t\toutput.stopReason = \"toolUse\";\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tif (responseData.usageMetadata) {\n\t\t\t\t\t\t\t\t// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input\n\t\t\t\t\t\t\t\tconst promptTokens = responseData.usageMetadata.promptTokenCount || 0;\n\t\t\t\t\t\t\t\tconst cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;\n\t\t\t\t\t\t\t\toutput.usage = {\n\t\t\t\t\t\t\t\t\tinput: promptTokens - cacheReadTokens,\n\t\t\t\t\t\t\t\t\toutput:\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.candidatesTokenCount || 0) +\n\t\t\t\t\t\t\t\t\t\t(responseData.usageMetadata.thoughtsTokenCount || 0),\n\t\t\t\t\t\t\t\t\tcacheRead: cacheReadTokens,\n\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\ttotalTokens: responseData.usageMetadata.totalTokenCount || 0,\n\t\t\t\t\t\t\t\t\tcost: {\n\t\t\t\t\t\t\t\t\t\tinput: 0,\n\t\t\t\t\t\t\t\t\t\toutput: 0,\n\t\t\t\t\t\t\t\t\t\tcacheRead: 0,\n\t\t\t\t\t\t\t\t\t\tcacheWrite: 0,\n\t\t\t\t\t\t\t\t\t\ttotal: 0,\n\t\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t\t};\n\t\t\t\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} finally {\n\t\t\t\t\toptions?.signal?.removeEventListener(\"abort\", abortHandler);\n\t\t\t\t}\n\n\t\t\t\tif (currentBlock) {\n\t\t\t\t\tif (currentBlock.type === \"text\") {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.text,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t} else {\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\tcontentIndex: blockIndex(),\n\t\t\t\t\t\t\tcontent: currentBlock.thinking,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\treturn hasContent;\n\t\t\t};\n\n\t\t\tlet receivedContent = false;\n\t\t\tlet currentResponse = response;\n\n\t\t\tfor (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {\n\t\t\t\tif (options?.signal?.aborted) {\n\t\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt > 0) {\n\t\t\t\t\tconst backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);\n\t\t\t\t\tawait sleep(backoffMs, options?.signal);\n\n\t\t\t\t\tif (!requestUrl) {\n\t\t\t\t\t\tthrow new Error(\"Missing request URL\");\n\t\t\t\t\t}\n\n\t\t\t\t\tcurrentResponse = await fetch(requestUrl, {\n\t\t\t\t\t\tmethod: \"POST\",\n\t\t\t\t\t\theaders: requestHeaders,\n\t\t\t\t\t\tbody: requestBodyJson,\n\t\t\t\t\t\tsignal: options?.signal,\n\t\t\t\t\t});\n\n\t\t\t\t\tif (!currentResponse.ok) {\n\t\t\t\t\t\tconst retryErrorText = await currentResponse.text();\n\t\t\t\t\t\tthrow new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tconst streamed = await streamResponse(currentResponse);\n\t\t\t\tif (streamed) {\n\t\t\t\t\treceivedContent = true;\n\t\t\t\t\tbreak;\n\t\t\t\t}\n\n\t\t\t\tif (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {\n\t\t\t\t\tresetOutput();\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (!receivedContent) {\n\t\t\t\tthrow new Error(\"Cloud Code Assist API returned an empty response\");\n\t\t\t}\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tif (\"index\" in block) {\n\t\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t}\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleGoogleGeminiCli: StreamFunction<\"google-gemini-cli\", SimpleStreamOptions> = (\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey;\n\tif (!apiKey) {\n\t\tthrow new Error(\"Google Cloud Code Assist requires OAuth authentication. Use /login to authenticate.\");\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tif (!options?.reasoning) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: { enabled: false },\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst effort = clampReasoning(options.reasoning)!;\n\tif (isGemini3Model(model.id)) {\n\t\treturn streamGoogleGeminiCli(model, context, {\n\t\t\t...base,\n\t\t\tthinking: {\n\t\t\t\tenabled: true,\n\t\t\t\tlevel: getGeminiCliThinkingLevel(effort, model.id),\n\t\t\t},\n\t\t} satisfies GoogleGeminiCliOptions);\n\t}\n\n\tconst defaultBudgets: ThinkingBudgets = {\n\t\tminimal: 1024,\n\t\tlow: 2048,\n\t\tmedium: 8192,\n\t\thigh: 16384,\n\t};\n\tconst budgets = { ...defaultBudgets, ...options.thinkingBudgets };\n\n\tconst minOutputTokens = 1024;\n\tlet thinkingBudget = budgets[effort]!;\n\tconst maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);\n\n\tif (maxTokens <= thinkingBudget) {\n\t\tthinkingBudget = Math.max(0, maxTokens - minOutputTokens);\n\t}\n\n\treturn streamGoogleGeminiCli(model, context, {\n\t\t...base,\n\t\tmaxTokens,\n\t\tthinking: {\n\t\t\tenabled: true,\n\t\t\tbudgetTokens: thinkingBudget,\n\t\t},\n\t} satisfies GoogleGeminiCliOptions);\n};\n\nexport function buildRequest(\n\tmodel: Model<\"google-gemini-cli\">,\n\tcontext: Context,\n\tprojectId: string,\n\toptions: GoogleGeminiCliOptions = {},\n\tisAntigravity = false,\n): CloudCodeAssistRequest {\n\tconst contents = convertMessages(model, context);\n\n\tconst generationConfig: CloudCodeAssistRequest[\"request\"][\"generationConfig\"] = {};\n\tif (options.temperature !== undefined) {\n\t\tgenerationConfig.temperature = options.temperature;\n\t}\n\tif (options.maxTokens !== undefined) {\n\t\tgenerationConfig.maxOutputTokens = options.maxTokens;\n\t}\n\n\t// Thinking config\n\tif (options.thinking?.enabled && model.reasoning) {\n\t\tgenerationConfig.thinkingConfig = {\n\t\t\tincludeThoughts: true,\n\t\t};\n\t\t// Gemini 3 models use thinkingLevel, older models use thinkingBudget\n\t\tif (options.thinking.level !== undefined) {\n\t\t\t// Cast to any since our GoogleThinkingLevel mirrors Google's ThinkingLevel enum values\n\t\t\tgenerationConfig.thinkingConfig.thinkingLevel = options.thinking.level as any;\n\t\t} else if (options.thinking.budgetTokens !== undefined) {\n\t\t\tgenerationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;\n\t\t}\n\t}\n\n\tconst request: CloudCodeAssistRequest[\"request\"] = {\n\t\tcontents,\n\t};\n\n\trequest.sessionId = options.sessionId;\n\n\t// System instruction must be object with parts, not plain string\n\tif (context.systemPrompt) {\n\t\trequest.systemInstruction = {\n\t\t\tparts: [{ text: sanitizeSurrogates(context.systemPrompt) }],\n\t\t};\n\t}\n\n\tif (Object.keys(generationConfig).length > 0) {\n\t\trequest.generationConfig = generationConfig;\n\t}\n\n\tif (context.tools && context.tools.length > 0) {\n\t\t// Claude models on Cloud Code Assist need the legacy `parameters` field;\n\t\t// the API translates it into Anthropic's `input_schema`.\n\t\tconst useParameters = model.id.startsWith(\"claude-\");\n\t\trequest.tools = convertTools(context.tools, useParameters);\n\t\tif (options.toolChoice) {\n\t\t\trequest.toolConfig = {\n\t\t\t\tfunctionCallingConfig: {\n\t\t\t\t\tmode: mapToolChoice(options.toolChoice),\n\t\t\t\t},\n\t\t\t};\n\t\t}\n\t}\n\n\tif (isAntigravity) {\n\t\tconst existingParts = request.systemInstruction?.parts ?? [];\n\t\trequest.systemInstruction = {\n\t\t\trole: \"user\",\n\t\t\tparts: [\n\t\t\t\t{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },\n\t\t\t\t{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },\n\t\t\t\t...existingParts,\n\t\t\t],\n\t\t};\n\t}\n\n\treturn {\n\t\tproject: projectId,\n\t\tmodel: model.id,\n\t\trequest,\n\t\t...(isAntigravity ? { requestType: \"agent\" } : {}),\n\t\tuserAgent: isAntigravity ? \"antigravity\" : \"pi-coding-agent\",\n\t\trequestId: `${isAntigravity ? \"agent\" : \"draht\"}-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`,\n\t};\n}\n\ntype ClampedThinkingLevel = Exclude<ThinkingLevel, \"xhigh\">;\n\nfunction getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {\n\tif (isGemini3ProModel(modelId)) {\n\t\tswitch (effort) {\n\t\t\tcase \"minimal\":\n\t\t\tcase \"low\":\n\t\t\t\treturn \"LOW\";\n\t\t\tcase \"medium\":\n\t\t\tcase \"high\":\n\t\t\t\treturn \"HIGH\";\n\t\t}\n\t}\n\tswitch (effort) {\n\t\tcase \"minimal\":\n\t\t\treturn \"MINIMAL\";\n\t\tcase \"low\":\n\t\t\treturn \"LOW\";\n\t\tcase \"medium\":\n\t\t\treturn \"MEDIUM\";\n\t\tcase \"high\":\n\t\t\treturn \"HIGH\";\n\t}\n}\n"]}
@@ -10,7 +10,12 @@ import { convertMessages, convertTools, isThinkingPart, mapStopReasonString, map
10
10
  import { buildBaseOptions, clampReasoning } from "./simple-options.js";
11
11
  const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
12
12
  const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
13
- const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT];
13
+ const ANTIGRAVITY_AUTOPUSH_ENDPOINT = "https://autopush-cloudcode-pa.sandbox.googleapis.com";
14
+ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [
15
+ ANTIGRAVITY_DAILY_ENDPOINT,
16
+ ANTIGRAVITY_AUTOPUSH_ENDPOINT,
17
+ DEFAULT_ENDPOINT,
18
+ ];
14
19
  // Headers for Gemini CLI (prod endpoint)
15
20
  const GEMINI_CLI_HEADERS = {
16
21
  "User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@@ -22,17 +27,11 @@ const GEMINI_CLI_HEADERS = {
22
27
  }),
23
28
  };
24
29
  // Headers for Antigravity (sandbox endpoint) - requires specific User-Agent
25
- const DEFAULT_ANTIGRAVITY_VERSION = "1.18.3";
30
+ const DEFAULT_ANTIGRAVITY_VERSION = "1.18.4";
26
31
  function getAntigravityHeaders() {
27
32
  const version = process.env.DRAHT_AI_ANTIGRAVITY_VERSION || DEFAULT_ANTIGRAVITY_VERSION;
28
33
  return {
29
34
  "User-Agent": `antigravity/${version} darwin/arm64`,
30
- "X-Goog-Api-Client": "google-cloud-sdk vscode_cloudshelleditor/0.1",
31
- "Client-Metadata": JSON.stringify({
32
- ideType: "IDE_UNSPECIFIED",
33
- platform: "PLATFORM_UNSPECIFIED",
34
- pluginType: "GEMINI",
35
- }),
36
35
  };
37
36
  }
38
37
  // Antigravity system instruction (compact version from CLIProxyAPI).
@@ -140,9 +139,8 @@ export function extractRetryDelay(errorText, response) {
140
139
  }
141
140
  return undefined;
142
141
  }
143
- function isClaudeThinkingModel(modelId) {
144
- const normalized = modelId.toLowerCase();
145
- return normalized.includes("claude") && normalized.includes("thinking");
142
+ function needsClaudeThinkingBetaHeader(model) {
143
+ return model.provider === "google-antigravity" && model.id.startsWith("claude-") && model.reasoning;
146
144
  }
147
145
  function isGemini3ProModel(modelId) {
148
146
  return /gemini-3(?:\.1)?-pro/.test(modelId.toLowerCase());
@@ -236,28 +234,34 @@ export const streamGoogleGeminiCli = (model, context, options) => {
236
234
  const isAntigravity = model.provider === "google-antigravity";
237
235
  const baseUrl = model.baseUrl?.trim();
238
236
  const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
239
- const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
240
- options?.onPayload?.(requestBody);
237
+ let requestBody = buildRequest(model, context, projectId, options, isAntigravity);
238
+ const nextRequestBody = await options?.onPayload?.(requestBody, model);
239
+ if (nextRequestBody !== undefined) {
240
+ requestBody = nextRequestBody;
241
+ }
241
242
  const headers = isAntigravity ? getAntigravityHeaders() : GEMINI_CLI_HEADERS;
242
243
  const requestHeaders = {
243
244
  Authorization: `Bearer ${accessToken}`,
244
245
  "Content-Type": "application/json",
245
246
  Accept: "text/event-stream",
246
247
  ...headers,
247
- ...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
248
+ ...(needsClaudeThinkingBetaHeader(model) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
248
249
  ...options?.headers,
249
250
  };
250
251
  const requestBodyJson = JSON.stringify(requestBody);
251
- // Fetch with retry logic for rate limits and transient errors
252
+ // Fetch with retry logic for rate limits, transient errors, and endpoint fallbacks.
253
+ // On 403/404, immediately try the next endpoint (no delay).
254
+ // On 429/5xx, retry with backoff on the same or next endpoint.
252
255
  let response;
253
256
  let lastError;
254
257
  let requestUrl;
258
+ let endpointIndex = 0;
255
259
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
256
260
  if (options?.signal?.aborted) {
257
261
  throw new Error("Request was aborted");
258
262
  }
259
263
  try {
260
- const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
264
+ const endpoint = endpoints[endpointIndex];
261
265
  requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
262
266
  response = await fetch(requestUrl, {
263
267
  method: "POST",
@@ -269,8 +273,17 @@ export const streamGoogleGeminiCli = (model, context, options) => {
269
273
  break; // Success, exit retry loop
270
274
  }
271
275
  const errorText = await response.text();
272
- // Check if retryable
276
+ // On 403/404, cascade to the next endpoint immediately (no delay)
277
+ if ((response.status === 403 || response.status === 404) && endpointIndex < endpoints.length - 1) {
278
+ endpointIndex++;
279
+ continue;
280
+ }
281
+ // Check if retryable (429, 5xx, network patterns)
273
282
  if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
283
+ // Advance endpoint if possible
284
+ if (endpointIndex < endpoints.length - 1) {
285
+ endpointIndex++;
286
+ }
274
287
  // Use server-provided delay or exponential backoff
275
288
  const serverDelay = extractRetryDelay(errorText, response);
276
289
  const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;