@zuplo/runtime 6.70.62 → 6.70.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/esm/chunk-C2TBCXWG.js +26 -0
- package/out/esm/chunk-C2TBCXWG.js.map +1 -0
- package/out/esm/chunk-GI3LNO4X.js +370 -0
- package/out/esm/chunk-GI3LNO4X.js.map +1 -0
- package/out/esm/index.js +1 -1
- package/out/esm/mcp-gateway/index.js +1 -1
- package/out/esm/mocks/index.js +1 -1
- package/out/types/index.d.ts +49 -0
- package/out/types/mcp-gateway/index.d.ts +1 -0
- package/out/types/mocks/index.d.ts +1 -0
- package/package.json +1 -1
- package/out/esm/chunk-HYUYKNAF.js +0 -370
- package/out/esm/chunk-HYUYKNAF.js.map +0 -1
- package/out/esm/chunk-LGEY3NNC.js +0 -26
- package/out/esm/chunk-LGEY3NNC.js.map +0 -1
- /package/out/esm/{chunk-HYUYKNAF.js.LEGAL.txt → chunk-GI3LNO4X.js.LEGAL.txt} +0 -0
package/out/types/index.d.ts
CHANGED
|
@@ -176,6 +176,22 @@ export declare class AIGatewayMeteringInboundPolicy extends InboundPolicy<AIGate
|
|
|
176
176
|
increments: AIGatewayMeterIncrements
|
|
177
177
|
): void;
|
|
178
178
|
static getIncrements(context: ZuploContext): AIGatewayMeterIncrements;
|
|
179
|
+
/**
|
|
180
|
+
* Record the global quota fallback models for the current request. Set by the
|
|
181
|
+
* metering policy when a quota is exceeded and a fallback is configured; read
|
|
182
|
+
* by the LLM translation handler, which routes to the capability-appropriate
|
|
183
|
+
* model instead of the (over-budget) primary.
|
|
184
|
+
*
|
|
185
|
+
* @param context - The ZuploContext
|
|
186
|
+
* @param quotaFallback - The validated quota fallback models.
|
|
187
|
+
*/
|
|
188
|
+
static setQuotaFallback(
|
|
189
|
+
context: ZuploContext,
|
|
190
|
+
quotaFallback: QuotaFallbackModels
|
|
191
|
+
): void;
|
|
192
|
+
static getQuotaFallback(
|
|
193
|
+
context: ZuploContext
|
|
194
|
+
): QuotaFallbackModels | undefined;
|
|
179
195
|
constructor(
|
|
180
196
|
options: AIGatewayMeteringInboundPolicyOptions,
|
|
181
197
|
policyName: string
|
|
@@ -185,6 +201,23 @@ export declare class AIGatewayMeteringInboundPolicy extends InboundPolicy<AIGate
|
|
|
185
201
|
context: ZuploContext
|
|
186
202
|
): Promise<Response | ZuploRequest<RequestGeneric_2>>;
|
|
187
203
|
private fetchCurrentMeters;
|
|
204
|
+
/**
|
|
205
|
+
* The capability a request targets, derived from its path, or `undefined` for
|
|
206
|
+
* paths that do not support quota fallback (e.g. `/v1/responses`, which the LLM
|
|
207
|
+
* handler serves without the fallback chain). Used to decide whether a quota
|
|
208
|
+
* fallback actually applies to *this* request.
|
|
209
|
+
*/
|
|
210
|
+
private requestCapability;
|
|
211
|
+
/**
|
|
212
|
+
* Validate and return the configured quota fallback when it applies to *this*
|
|
213
|
+
* request's capability, or `undefined` otherwise (no config, malformed config,
|
|
214
|
+
* an unsupported path, or no model configured for the request's capability).
|
|
215
|
+
* Returning `undefined` blocks the request with a 429 — so a fallback is only
|
|
216
|
+
* counted and applied when the handler can actually serve the request with it.
|
|
217
|
+
* The config is customer-authored and reaches us through an `unknown` cast, so
|
|
218
|
+
* we validate it through Zod before acting on it.
|
|
219
|
+
*/
|
|
220
|
+
private resolveQuotaFallback;
|
|
188
221
|
private checkHierarchicalQuotaLimits;
|
|
189
222
|
/**
|
|
190
223
|
* Increment meters via API. Can be used by providers for streaming responses.
|
|
@@ -2969,6 +3002,7 @@ declare const EventType: {
|
|
|
2969
3002
|
readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
|
|
2970
3003
|
readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
|
|
2971
3004
|
readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
|
|
3005
|
+
readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
|
|
2972
3006
|
readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
|
|
2973
3007
|
readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
|
|
2974
3008
|
readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
|
|
@@ -6844,6 +6878,12 @@ export declare interface MockApiInboundOptions {
|
|
|
6844
6878
|
*/
|
|
6845
6879
|
export declare const MockApiInboundPolicy: InboundPolicyHandler<MockApiInboundOptions>;
|
|
6846
6880
|
|
|
6881
|
+
declare interface ModelConfiguration {
|
|
6882
|
+
environmentVariable: string;
|
|
6883
|
+
model: string;
|
|
6884
|
+
provider: string;
|
|
6885
|
+
}
|
|
6886
|
+
|
|
6847
6887
|
declare type Modify<T, R> = Omit<T, keyof R> & R;
|
|
6848
6888
|
|
|
6849
6889
|
declare interface MoesifContext {
|
|
@@ -8667,6 +8707,15 @@ export declare interface QuotaDetail {
|
|
|
8667
8707
|
};
|
|
8668
8708
|
}
|
|
8669
8709
|
|
|
8710
|
+
/**
|
|
8711
|
+
* Global quota fallback models, keyed by capability. Applied whenever any
|
|
8712
|
+
* configured quota limit (any meter/period) is exceeded.
|
|
8713
|
+
*/
|
|
8714
|
+
declare interface QuotaFallbackModels {
|
|
8715
|
+
completions?: ModelConfiguration;
|
|
8716
|
+
embeddings?: ModelConfiguration;
|
|
8717
|
+
}
|
|
8718
|
+
|
|
8670
8719
|
/**
|
|
8671
8720
|
* The Quota policy enables you to set monthly, weekly, daily or hourly quotas on your API.
|
|
8672
8721
|
*
|
|
@@ -103,6 +103,7 @@ declare const EventType: {
|
|
|
103
103
|
readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
|
|
104
104
|
readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
|
|
105
105
|
readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
|
|
106
|
+
readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
|
|
106
107
|
readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
|
|
107
108
|
readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
|
|
108
109
|
readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
|
|
@@ -84,6 +84,7 @@ declare const EventType: {
|
|
|
84
84
|
readonly AI_GATEWAY_LATENCY_HISTOGRAM: "ai_gateway_latency_histogram";
|
|
85
85
|
readonly AI_GATEWAY_WARNING_COUNT: "ai_gateway_warning_count";
|
|
86
86
|
readonly AI_GATEWAY_BLOCKED_COUNT: "ai_gateway_blocked_count";
|
|
87
|
+
readonly AI_GATEWAY_FALLBACK_COUNT: "ai_gateway_fallback_count";
|
|
87
88
|
readonly MCP_REQUEST_RECEIVED: "mcp_request_received";
|
|
88
89
|
readonly MCP_REQUEST_COMPLETED: "mcp_request_completed";
|
|
89
90
|
readonly MCP_REQUEST_REJECTED: "mcp_request_rejected";
|