@hebo-ai/gateway 0.6.1 → 0.6.2-rc0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -569,7 +569,7 @@ Accepted request fields:
569
569
 
570
570
  - `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
571
571
  - `cache_control` (OpenRouter / Vercel / Claude style)
572
- - `cached_content` (Gemini style)
572
+ - `extra_body { google: { cached_content } }` (Gemini style)
573
573
 
574
574
  ```json
575
575
  {
@@ -6,9 +6,9 @@ import { toResponse } from "../../utils/response";
6
6
  import { parseDataUrl } from "../../utils/url";
7
7
  // --- Request Flow ---
8
8
  export function convertToTextCallOptions(params) {
9
- const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, cached_content, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
9
+ const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
10
10
  Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
11
- Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control));
11
+ Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, extra_body?.google?.cached_content, cache_control));
12
12
  const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
13
13
  return {
14
14
  messages: convertToModelMessages(messages),
@@ -658,7 +658,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
658
658
  in_memory: "in_memory";
659
659
  "24h": "24h";
660
660
  }>>;
661
- cached_content: z.ZodOptional<z.ZodString>;
661
+ extra_body: z.ZodOptional<z.ZodObject<{
662
+ google: z.ZodOptional<z.ZodObject<{
663
+ cached_content: z.ZodOptional<z.ZodString>;
664
+ }, z.core.$strip>>;
665
+ }, z.core.$strip>>;
662
666
  cache_control: z.ZodOptional<z.ZodObject<{
663
667
  type: z.ZodLiteral<"ephemeral">;
664
668
  ttl: z.ZodOptional<z.ZodString>;
@@ -866,7 +870,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
866
870
  in_memory: "in_memory";
867
871
  "24h": "24h";
868
872
  }>>;
869
- cached_content: z.ZodOptional<z.ZodString>;
873
+ extra_body: z.ZodOptional<z.ZodObject<{
874
+ google: z.ZodOptional<z.ZodObject<{
875
+ cached_content: z.ZodOptional<z.ZodString>;
876
+ }, z.core.$strip>>;
877
+ }, z.core.$strip>>;
870
878
  cache_control: z.ZodOptional<z.ZodObject<{
871
879
  type: z.ZodLiteral<"ephemeral">;
872
880
  ttl: z.ZodOptional<z.ZodString>;
@@ -213,7 +213,17 @@ const ChatCompletionsInputsSchema = z.object({
213
213
  prompt_cache_key: z.string().optional(),
214
214
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
215
215
  // Extension origin: Gemini explicit cache handle
216
- cached_content: z.string().optional().meta({ extension: true }),
216
+ // FUTURE: generalize extra_body handling
217
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
218
+ extra_body: z
219
+ .object({
220
+ google: z
221
+ .object({
222
+ cached_content: z.string().optional().meta({ extension: true }),
223
+ })
224
+ .optional(),
225
+ })
226
+ .optional(),
217
227
  // Extension origin: OpenRouter/Vercel/Anthropic
218
228
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
219
229
  // Extension origin: OpenRouter
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.6.1",
3
+ "version": "0.6.2-rc0",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -170,7 +170,7 @@
170
170
  "@ai-sdk/google-vertex": "^4.0.80",
171
171
  "@ai-sdk/groq": "^3.0.29",
172
172
  "@ai-sdk/openai": "^3.0.41",
173
- "@aws-sdk/credential-providers": "^3.1002.0",
173
+ "@aws-sdk/credential-providers": "^3.1003.0",
174
174
  "@langfuse/otel": "^4.6.1",
175
175
  "@mjackson/node-fetch-server": "^0.7.0",
176
176
  "@opentelemetry/api": "^1.9.0",
@@ -87,7 +87,7 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
87
87
  reasoning,
88
88
  prompt_cache_key,
89
89
  prompt_cache_retention,
90
- cached_content,
90
+ extra_body,
91
91
  cache_control,
92
92
  frequency_penalty,
93
93
  presence_penalty,
@@ -103,7 +103,7 @@ export function convertToTextCallOptions(params: ChatCompletionsInputs): TextCal
103
103
  parsePromptCachingOptions(
104
104
  prompt_cache_key,
105
105
  prompt_cache_retention,
106
- cached_content,
106
+ extra_body?.google?.cached_content,
107
107
  cache_control,
108
108
  ),
109
109
  );
@@ -254,7 +254,17 @@ const ChatCompletionsInputsSchema = z.object({
254
254
  prompt_cache_key: z.string().optional(),
255
255
  prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
256
256
  // Extension origin: Gemini explicit cache handle
257
- cached_content: z.string().optional().meta({ extension: true }),
257
+ // FUTURE: generalize extra_body handling
258
+ // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
259
+ extra_body: z
260
+ .object({
261
+ google: z
262
+ .object({
263
+ cached_content: z.string().optional().meta({ extension: true }),
264
+ })
265
+ .optional(),
266
+ })
267
+ .optional(),
258
268
  // Extension origin: OpenRouter/Vercel/Anthropic
259
269
  cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
260
270
  // Extension origin: OpenRouter