@zuplo/runtime 6.70.62 → 6.70.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -176,6 +176,22 @@ export declare class AIGatewayMeteringInboundPolicy extends InboundPolicy<AIGate
176
176
  increments: AIGatewayMeterIncrements
177
177
  ): void;
178
178
  static getIncrements(context: ZuploContext): AIGatewayMeterIncrements;
179
+ /**
180
+ * Record the global quota fallback models for the current request. Set by the
181
+ * metering policy when a quota is exceeded and a fallback is configured; read
182
+ * by the LLM translation handler, which routes to the capability-appropriate
183
+ * model instead of the (over-budget) primary.
184
+ *
185
+ * @param context - The ZuploContext
186
+ * @param quotaFallback - The validated quota fallback models.
187
+ */
188
+ static setQuotaFallback(
189
+ context: ZuploContext,
190
+ quotaFallback: QuotaFallbackModels
191
+ ): void;
192
+ static getQuotaFallback(
193
+ context: ZuploContext
194
+ ): QuotaFallbackModels | undefined;
179
195
  constructor(
180
196
  options: AIGatewayMeteringInboundPolicyOptions,
181
197
  policyName: string
@@ -185,6 +201,23 @@ export declare class AIGatewayMeteringInboundPolicy extends InboundPolicy<AIGate
185
201
  context: ZuploContext
186
202
  ): Promise<Response | ZuploRequest<RequestGeneric_2>>;
187
203
  private fetchCurrentMeters;
204
+ /**
205
+ * The capability a request targets, derived from its path, or `undefined` for
206
+ * paths that do not support quota fallback (e.g. `/v1/responses`, which the LLM
207
+ * handler serves without the fallback chain). Used to decide whether a quota
208
+ * fallback actually applies to *this* request.
209
+ */
210
+ private requestCapability;
211
+ /**
212
+ * Validate and return the configured quota fallback when it applies to *this*
213
+ * request's capability, or `undefined` otherwise (no config, malformed config,
214
+ * an unsupported path, or no model configured for the request's capability).
215
+ * Returning `undefined` blocks the request with a 429 — so a fallback is only
216
+ * counted and applied when the handler can actually serve the request with it.
217
+ * The config is customer-authored and reaches us through an `unknown` cast, so
218
+ * we validate it through Zod before acting on it.
219
+ */
220
+ private resolveQuotaFallback;
188
221
  private checkHierarchicalQuotaLimits;
189
222
  /**
190
223
  * Increment meters via API. Can be used by providers for streaming responses.
@@ -2969,6 +3002,7 @@ declare const EventType: {
2969
3002
  readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
2970
3003
  readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
2971
3004
  readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
3005
+ readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
2972
3006
  readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
2973
3007
  readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
2974
3008
  readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
@@ -6844,6 +6878,12 @@ export declare interface MockApiInboundOptions {
6844
6878
  */
6845
6879
  export declare const MockApiInboundPolicy: InboundPolicyHandler<MockApiInboundOptions>;
6846
6880
 
6881
+ declare interface ModelConfiguration {
6882
+ environmentVariable: string;
6883
+ model: string;
6884
+ provider: string;
6885
+ }
6886
+
6847
6887
  declare type Modify<T, R> = Omit<T, keyof R> & R;
6848
6888
 
6849
6889
  declare interface MoesifContext {
@@ -8667,6 +8707,15 @@ export declare interface QuotaDetail {
8667
8707
  };
8668
8708
  }
8669
8709
 
8710
+ /**
8711
+ * Global quota fallback models, keyed by capability. Applied whenever any
8712
+ * configured quota limit (any meter/period) is exceeded.
8713
+ */
8714
+ declare interface QuotaFallbackModels {
8715
+ completions?: ModelConfiguration;
8716
+ embeddings?: ModelConfiguration;
8717
+ }
8718
+
8670
8719
  /**
8671
8720
  * The Quota policy enables you to set monthly, weekly, daily or hourly quotas on your API.
8672
8721
  *
@@ -103,6 +103,7 @@ declare const EventType: {
103
103
  readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
104
104
  readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
105
105
  readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
106
+ readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
106
107
  readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
107
108
  readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
108
109
  readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
@@ -84,6 +84,7 @@ declare const EventType: {
84
84
  readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
85
85
  readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
86
86
  readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
87
+ readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
87
88
  readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
88
89
  readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
89
90
  readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@zuplo/runtime",
3
3
  "type": "module",
4
- "version": "6.70.62",
4
+ "version": "6.70.63",
5
5
  "repository": "https://github.com/zuplo/zuplo",
6
6
  "author": "Zuplo, Inc.",
7
7
  "exports": {