zuplo 6.70.70 → 6.70.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/ai-gateway/getting-started.mdx +2 -1
- package/docs/ai-gateway/integrations/ai-sdk.mdx +17 -0
- package/docs/ai-gateway/introduction.mdx +5 -5
- package/docs/ai-gateway/providers.mdx +2 -0
- package/docs/analytics/access-and-entitlements.md +71 -0
- package/docs/analytics/overview.md +63 -0
- package/docs/analytics/reference/metrics-glossary.md +105 -0
- package/docs/analytics/reference/url-parameters.md +66 -0
- package/docs/analytics/shared-controls.md +121 -0
- package/docs/analytics/tabs/agents.md +88 -0
- package/docs/analytics/tabs/consumers.md +73 -0
- package/docs/analytics/tabs/graphql.md +77 -0
- package/docs/analytics/tabs/mcp.md +80 -0
- package/docs/analytics/tabs/origins.md +82 -0
- package/docs/analytics/tabs/requests.md +96 -0
- package/docs/articles/ci-cd-github/basic-deployment.mdx +10 -1
- package/docs/articles/ci-cd-github/deploy-and-test.mdx +14 -1
- package/docs/articles/ci-cd-github/local-testing.mdx +3 -1
- package/docs/articles/ci-cd-github/pr-preview-environments.mdx +36 -4
- package/docs/articles/custom-ci-cd-github.mdx +11 -2
- package/docs/articles/monetization/api-access.mdx +184 -0
- package/docs/articles/monetization/meters.mdx +4 -4
- package/docs/articles/monetization/monetization-policy.md +4 -1
- package/docs/articles/monetization/private-plans.md +3 -4
- package/docs/articles/monetization/stripe-integration.md +9 -0
- package/docs/articles/monetization/subscription-lifecycle.md +12 -11
- package/docs/articles/monorepo-deployment.mdx +20 -2
- package/docs/cli/deploy.mdx +32 -0
- package/docs/cli/deploy.partial.mdx +32 -0
- package/docs/concepts/api-keys.md +2 -2
- package/docs/dev-portal/zudoku/components/callout.mdx +11 -18
- package/docs/dev-portal/zudoku/configuration/search.md +36 -0
- package/docs/dev-portal/zudoku/configuration/site.md +38 -0
- package/docs/dev-portal/zudoku/customization/colors-theme.mdx +51 -40
- package/docs/errors/rate-limit-exceeded.mdx +30 -3
- package/docs/policies/_index.md +2 -0
- package/docs/policies/data-loss-prevention-inbound/doc.md +116 -0
- package/docs/policies/data-loss-prevention-inbound/intro.md +15 -0
- package/docs/policies/data-loss-prevention-inbound/schema.json +220 -0
- package/docs/policies/data-loss-prevention-outbound/doc.md +116 -0
- package/docs/policies/data-loss-prevention-outbound/intro.md +18 -0
- package/docs/policies/data-loss-prevention-outbound/schema.json +220 -0
- package/docs/programmable-api/background-dispatcher.mdx +6 -8
- package/docs/programmable-api/zone-cache.mdx +1 -1
- package/docs/rate-limiting/combining-policies.mdx +293 -0
- package/docs/rate-limiting/dynamic-rate-limiting.mdx +240 -0
- package/docs/rate-limiting/getting-started.mdx +339 -0
- package/docs/rate-limiting/how-it-works.md +225 -0
- package/docs/rate-limiting/monitoring-and-troubleshooting.mdx +243 -0
- package/docs/{articles → rate-limiting}/per-user-rate-limits-using-db.mdx +39 -27
- package/package.json +4 -4
- package/docs/concepts/rate-limiting.md +0 -246
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: How rate limiting works
|
|
3
|
+
sidebar_label: How it works
|
|
4
|
+
description:
|
|
5
|
+
Understand Zuplo's sliding window rate limiter — how requests are counted,
|
|
6
|
+
what each rateLimitBy mode does, the Complex Rate Limiting policy, and every
|
|
7
|
+
configuration option.
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
This page covers the mechanics behind Zuplo's rate limiter: how requests are
|
|
11
|
+
counted, what each `rateLimitBy` mode does in detail, and every configuration
|
|
12
|
+
option available. If you just want to add a rate limit to your API, start with
|
|
13
|
+
the [Getting Started guide](./getting-started.mdx) instead — this page is the
|
|
14
|
+
deep dive you can read alongside or after it.
|
|
15
|
+
|
|
16
|
+
Zuplo's rate limiter uses a **sliding window algorithm** enforced globally
|
|
17
|
+
across all edge locations. Unlike a fixed window algorithm (which resets
|
|
18
|
+
counters at fixed intervals and can allow bursts at window boundaries), the
|
|
19
|
+
sliding window continuously tracks requests over a rolling time period. This
|
|
20
|
+
produces smoother, more predictable throttling behavior.
|
|
21
|
+
|
|
22
|
+
## Key terms
|
|
23
|
+
|
|
24
|
+
A few terms show up repeatedly in the rate limiting docs. They are related but
|
|
25
|
+
not interchangeable.
|
|
26
|
+
|
|
27
|
+
- **Counter (or bucket)** — The running tally Zuplo keeps for a single caller
|
|
28
|
+
and a single policy. Each unique combination of policy `name` and caller
|
|
29
|
+
identifier gets its own counter. Two different policies tracking the same
|
|
30
|
+
caller do _not_ share a counter; two different callers under the same policy
|
|
31
|
+
do not share a counter either.
|
|
32
|
+
- **Rate limit key** — The string value that identifies a caller for bucketing.
|
|
33
|
+
For `rateLimitBy: "ip"` the key is the client's IP address; for `"user"` it is
|
|
34
|
+
`request.user.sub`; for `"function"` it is whatever your custom function
|
|
35
|
+
returns as `CustomRateLimitDetails.key`; for `"all"` there is a single
|
|
36
|
+
implicit key shared by every request to the route.
|
|
37
|
+
- **`identifier` option** — A field in the policy's configuration that points
|
|
38
|
+
Zuplo at your custom TypeScript function when `rateLimitBy` is `"function"`.
|
|
39
|
+
Zuplo calls that function on each request, and the function returns a
|
|
40
|
+
`CustomRateLimitDetails` object whose `key` property becomes the rate limit
|
|
41
|
+
key. In short: `identifier` is _where the function lives_; `key` is _what the
|
|
42
|
+
function returns_.
|
|
43
|
+
|
|
44
|
+
## How `rateLimitBy` works
|
|
45
|
+
|
|
46
|
+
The `rateLimitBy` option determines how the rate limiter groups requests into
|
|
47
|
+
buckets. Both the standard
|
|
48
|
+
[Rate Limiting policy](../policies/rate-limit-inbound.mdx) and the
|
|
49
|
+
[Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
|
|
50
|
+
support the same four modes.
|
|
51
|
+
|
|
52
|
+
### `ip`
|
|
53
|
+
|
|
54
|
+
Groups requests by the client's IP address. No authentication is required. This
|
|
55
|
+
is the simplest option and works well for public APIs or as a first layer of
|
|
56
|
+
protection.
|
|
57
|
+
|
|
58
|
+
:::caution
|
|
59
|
+
|
|
60
|
+
Multiple clients behind the same corporate proxy, cloud NAT, or shared Wi-Fi
|
|
61
|
+
network can share a single IP address. In these cases, IP-based rate limiting
|
|
62
|
+
can unfairly throttle unrelated users. For authenticated APIs, prefer
|
|
63
|
+
`rateLimitBy: "user"` instead.
|
|
64
|
+
|
|
65
|
+
:::
|
|
66
|
+
|
|
67
|
+
### `user`
|
|
68
|
+
|
|
69
|
+
Groups requests by the authenticated user's identity (`request.user.sub`). When
|
|
70
|
+
using [API key authentication](../articles/api-key-authentication.mdx), the
|
|
71
|
+
`sub` value is the consumer name you assigned when creating the API key. When
|
|
72
|
+
using JWT authentication, it comes from the token's `sub` claim.
|
|
73
|
+
|
|
74
|
+
This is the recommended mode for authenticated APIs because it ties limits to
|
|
75
|
+
the actual consumer rather than a shared IP address.
|
|
76
|
+
|
|
77
|
+
:::note
|
|
78
|
+
|
|
79
|
+
The `user` mode requires an authentication policy (such as API key or JWT
|
|
80
|
+
authentication) earlier in the policy pipeline. If no authenticated user is
|
|
81
|
+
present on the request, the policy returns an error. See
|
|
82
|
+
[Getting Started §5](./getting-started.mdx#5-rate-limit-authenticated-users) for
|
|
83
|
+
a full authenticated pipeline example.
|
|
84
|
+
|
|
85
|
+
:::
|
|
86
|
+
|
|
87
|
+
### `function`
|
|
88
|
+
|
|
89
|
+
Groups requests using a custom TypeScript function that you provide. The
|
|
90
|
+
function returns a `CustomRateLimitDetails` object containing a grouping key
|
|
91
|
+
and, optionally, overridden values for `requestsAllowed` and
|
|
92
|
+
`timeWindowMinutes`. See
|
|
93
|
+
[Custom rate limit functions](#custom-rate-limit-functions) below for the
|
|
94
|
+
function signature and field reference.
|
|
95
|
+
|
|
96
|
+
### `all`
|
|
97
|
+
|
|
98
|
+
Applies a single shared counter across all requests to the route, regardless of
|
|
99
|
+
who makes them. Use this for global rate limits on endpoints that call
|
|
100
|
+
resource-constrained backends.
|
|
101
|
+
|
|
102
|
+
## Custom rate limit functions
|
|
103
|
+
|
|
104
|
+
When `rateLimitBy` is set to `"function"`, Zuplo calls a TypeScript function you
|
|
105
|
+
provide on every request. The function receives the request, context, and policy
|
|
106
|
+
name, and returns a `CustomRateLimitDetails` object describing how to count that
|
|
107
|
+
request.
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
import {
|
|
111
|
+
CustomRateLimitDetails,
|
|
112
|
+
ZuploContext,
|
|
113
|
+
ZuploRequest,
|
|
114
|
+
} from "@zuplo/runtime";
|
|
115
|
+
|
|
116
|
+
export function rateLimit(
|
|
117
|
+
request: ZuploRequest,
|
|
118
|
+
context: ZuploContext,
|
|
119
|
+
policyName: string,
|
|
120
|
+
): CustomRateLimitDetails | undefined {
|
|
121
|
+
return {
|
|
122
|
+
key: request.user.sub,
|
|
123
|
+
requestsAllowed: 100,
|
|
124
|
+
timeWindowMinutes: 1,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### `CustomRateLimitDetails`
|
|
130
|
+
|
|
131
|
+
- `key` (required) — The string used to group requests into rate limit buckets.
|
|
132
|
+
- `requestsAllowed` (optional) — Overrides the policy's `requestsAllowed` value
|
|
133
|
+
for this request.
|
|
134
|
+
- `timeWindowMinutes` (optional) — Overrides the policy's `timeWindowMinutes`
|
|
135
|
+
value for this request.
|
|
136
|
+
|
|
137
|
+
Returning `undefined` skips rate limiting for the request entirely — useful for
|
|
138
|
+
health checks or privileged callers. The function can also be `async` if you
|
|
139
|
+
need to await a database lookup or external service call.
|
|
140
|
+
|
|
141
|
+
Wire the function into the policy using the `identifier` option. The policy's
|
|
142
|
+
configured `requestsAllowed` and `timeWindowMinutes` serve as defaults; the
|
|
143
|
+
function can override them per request.
|
|
144
|
+
|
|
145
|
+
For concrete walkthroughs (tier-based, route-based, method-based,
|
|
146
|
+
database-backed, selective bypass), see
|
|
147
|
+
[Dynamic Rate Limiting](./dynamic-rate-limiting.mdx). For an advanced
|
|
148
|
+
database-backed example with caching, see
|
|
149
|
+
[Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx).
|
|
150
|
+
|
|
151
|
+
## Additional options
|
|
152
|
+
|
|
153
|
+
Both rate limiting policies support the following additional options:
|
|
154
|
+
|
|
155
|
+
- `headerMode` — Set to `"retry-after"` (default) to include the `Retry-After`
|
|
156
|
+
header in 429 responses, or `"none"` to omit it. The `Retry-After` value is
|
|
157
|
+
returned as a number of seconds (delay-seconds format).
|
|
158
|
+
- `mode` — Set to `"strict"` (default) or `"async"`. In **strict** mode, the
|
|
159
|
+
request is held until the rate limit check completes — the backend is never
|
|
160
|
+
called if the limit is exceeded. This adds some latency to every request
|
|
161
|
+
because the check hits a globally distributed rate limit service. In **async**
|
|
162
|
+
mode, the request proceeds to the backend in parallel with the rate limit
|
|
163
|
+
check. This minimizes added latency but means some requests may get through
|
|
164
|
+
even after the limit is exceeded. Async mode is a good fit when low latency
|
|
165
|
+
matters more than exact enforcement.
|
|
166
|
+
- `throwOnFailure` — Controls behavior when the rate limit service is
|
|
167
|
+
unreachable. When set to `false` (default), requests are allowed through
|
|
168
|
+
(fail-open). When set to `true`, the policy returns an error to the client.
|
|
169
|
+
The fail-open default prevents a rate limit service outage from blocking all
|
|
170
|
+
traffic to your API.
|
|
171
|
+
|
|
172
|
+
## Complex Rate Limiting policy
|
|
173
|
+
|
|
174
|
+
The [Complex Rate Limiting policy](../policies/complex-rate-limit-inbound.mdx)
|
|
175
|
+
supports **multiple named counters** in a single policy. Each counter tracks a
|
|
176
|
+
different resource or unit of work.
|
|
177
|
+
|
|
178
|
+
```json
|
|
179
|
+
{
|
|
180
|
+
"name": "my-complex-rate-limit-policy",
|
|
181
|
+
"policyType": "complex-rate-limit-inbound",
|
|
182
|
+
"handler": {
|
|
183
|
+
"export": "ComplexRateLimitInboundPolicy",
|
|
184
|
+
"module": "$import(@zuplo/runtime)",
|
|
185
|
+
"options": {
|
|
186
|
+
"rateLimitBy": "user",
|
|
187
|
+
"timeWindowMinutes": 1,
|
|
188
|
+
"limits": {
|
|
189
|
+
"requests": 100,
|
|
190
|
+
"compute": 500
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Override counter increments programmatically per request with
|
|
198
|
+
`ComplexRateLimitInboundPolicy.setIncrements()`. This suits usage-based pricing,
|
|
199
|
+
where different endpoints consume different amounts of a resource (for example,
|
|
200
|
+
counting compute units or tokens instead of raw requests).
|
|
201
|
+
|
|
202
|
+
## Related resources
|
|
203
|
+
|
|
204
|
+
**Go deeper on configuration:**
|
|
205
|
+
|
|
206
|
+
- [Rate Limiting policy reference](../policies/rate-limit-inbound.mdx) — Every
|
|
207
|
+
option for the standard policy.
|
|
208
|
+
- [Complex Rate Limiting policy reference](../policies/complex-rate-limit-inbound.mdx)
|
|
209
|
+
— Multi-counter limits for usage-based pricing (enterprise).
|
|
210
|
+
|
|
211
|
+
**Learn by example:**
|
|
212
|
+
|
|
213
|
+
- [Dynamic Rate Limiting](./dynamic-rate-limiting.mdx) — Tiered limits by
|
|
214
|
+
customer type.
|
|
215
|
+
- [Per-user rate limiting with a database](./per-user-rate-limits-using-db.mdx)
|
|
216
|
+
— Look up limits at request time using ZoneCache and a database.
|
|
217
|
+
|
|
218
|
+
**Combine with other policies:**
|
|
219
|
+
|
|
220
|
+
- [Combining Policies](./combining-policies.mdx) — Stack multiple rate limits,
|
|
221
|
+
and pair rate limiting with quotas or monetization.
|
|
222
|
+
- [Quota policy](../policies/quota-inbound.mdx) — Monthly or billing-period
|
|
223
|
+
usage caps.
|
|
224
|
+
- [Monetization policy](../articles/monetization/monetization-policy.md) —
|
|
225
|
+
Subscription-based access control and metering.
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Monitoring and troubleshooting rate limits
|
|
3
|
+
sidebar_label: Monitoring & troubleshooting
|
|
4
|
+
description:
|
|
5
|
+
Monitor rate limit events, debug unexpected 429 responses, and understand
|
|
6
|
+
failure modes.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Rate limiting only delivers value when you can observe it in action. Without
|
|
10
|
+
visibility into which consumers hit limits, how often requests are rejected, and
|
|
11
|
+
whether the rate limit service itself is healthy, you are operating blind. This
|
|
12
|
+
guide covers how to monitor rate limit activity, understand failure modes,
|
|
13
|
+
choose the right enforcement mode, and diagnose common issues.
|
|
14
|
+
|
|
15
|
+
## Monitoring rate limit events
|
|
16
|
+
|
|
17
|
+
Zuplo produces structured logs for every request, including those rejected with
|
|
18
|
+
a `429 Too Many Requests` status code. Ship these logs to an external provider
|
|
19
|
+
to build dashboards and alerts around rate limit activity.
|
|
20
|
+
|
|
21
|
+
### Setting up log shipping
|
|
22
|
+
|
|
23
|
+
Configure a [logging plugin](../articles/logging.mdx) in your `zuplo.runtime.ts`
|
|
24
|
+
file to send logs to your observability platform. Zuplo supports AWS CloudWatch,
|
|
25
|
+
Datadog, Dynatrace, Google Cloud Logging, Loki, New Relic, Splunk, Sumo Logic,
|
|
26
|
+
and VMware Log Insight. You can also build a
|
|
27
|
+
[custom logging plugin](../articles/custom-logging-example.mdx) for unsupported
|
|
28
|
+
providers.
|
|
29
|
+
|
|
30
|
+
### Filtering for rate-limited requests
|
|
31
|
+
|
|
32
|
+
Every log entry includes default fields you can filter on:
|
|
33
|
+
|
|
34
|
+
- **`requestId`** -- Correlate a specific rejected request end-to-end using the
|
|
35
|
+
`zp-rid` response header.
|
|
36
|
+
- **`environment`** and **`environmentStage`** -- Distinguish between
|
|
37
|
+
`production`, `preview`, and `working-copy` environments.
|
|
38
|
+
|
|
39
|
+
To break down rate-limited requests by consumer or IP, add custom log properties
|
|
40
|
+
in a policy that runs before or alongside the rate limit check:
|
|
41
|
+
|
|
42
|
+
```ts
|
|
43
|
+
import { ZuploContext, ZuploRequest } from "@zuplo/runtime";
|
|
44
|
+
|
|
45
|
+
export default async function policy(
|
|
46
|
+
request: ZuploRequest,
|
|
47
|
+
context: ZuploContext,
|
|
48
|
+
) {
|
|
49
|
+
// Tag every log entry with the consumer identity for filtering
|
|
50
|
+
context.log.setLogProperties!({
|
|
51
|
+
rateLimitIdentity:
|
|
52
|
+
request.user?.sub ?? request.headers.get("true-client-ip") ?? "unknown",
|
|
53
|
+
});
|
|
54
|
+
return request;
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
This adds a `rateLimitIdentity` field to all log entries for the request, making
|
|
59
|
+
it straightforward to group 429 responses by consumer in your logging dashboard.
|
|
60
|
+
|
|
61
|
+
### Setting up alerts
|
|
62
|
+
|
|
63
|
+
Configure alerts in your logging provider for the following conditions:
|
|
64
|
+
|
|
65
|
+
- **Spike in 429 responses** -- A sudden increase may indicate a
|
|
66
|
+
misconfiguration, an attack, or a legitimate traffic surge.
|
|
67
|
+
- **429 rate exceeding a threshold** -- If more than a small percentage of
|
|
68
|
+
requests return 429, the rate limit may be set too low for normal traffic.
|
|
69
|
+
- **Zero 429 responses over an extended period** -- If you expect rate limiting
|
|
70
|
+
to be active but see no rejections, the policy may not be attached to the
|
|
71
|
+
correct routes.
|
|
72
|
+
|
|
73
|
+
### Metrics plugins
|
|
74
|
+
|
|
75
|
+
For quantitative monitoring, Zuplo supports
|
|
76
|
+
[metrics plugins](../articles/metrics-plugins.mdx) that send request latency,
|
|
77
|
+
request size, and response size data to Datadog, Dynatrace, New Relic, or any
|
|
78
|
+
OpenTelemetry-compatible collector. While these metrics do not track rate limit
|
|
79
|
+
counters directly, the `statusCode` dimension (when enabled) allows you to chart
|
|
80
|
+
429 response rates alongside overall request volume.
|
|
81
|
+
|
|
82
|
+
## Understanding failure modes
|
|
83
|
+
|
|
84
|
+
The rate limiting policies depend on a globally distributed rate limit service
|
|
85
|
+
to track request counters. Understanding what happens when that service is
|
|
86
|
+
unreachable helps you make the right availability tradeoff.
|
|
87
|
+
|
|
88
|
+
### Fail-open (default)
|
|
89
|
+
|
|
90
|
+
By default, `throwOnFailure` is set to `false`. If the rate limit service is
|
|
91
|
+
unreachable, the policy allows the request through. This fail-open behavior
|
|
92
|
+
prevents a rate limit service outage from blocking all traffic to your API.
|
|
93
|
+
|
|
94
|
+
The tradeoff is that during an outage, rate limits are not enforced and clients
|
|
95
|
+
can exceed their configured thresholds.
|
|
96
|
+
|
|
97
|
+
### Fail-closed
|
|
98
|
+
|
|
99
|
+
Set `throwOnFailure` to `true` to return an error when the rate limit service is
|
|
100
|
+
unreachable. This guarantees that no request bypasses rate limiting, but it
|
|
101
|
+
means a service disruption blocks all traffic on routes using that policy.
|
|
102
|
+
|
|
103
|
+
```json
|
|
104
|
+
{
|
|
105
|
+
"options": {
|
|
106
|
+
"rateLimitBy": "user",
|
|
107
|
+
"requestsAllowed": 100,
|
|
108
|
+
"timeWindowMinutes": 1,
|
|
109
|
+
"throwOnFailure": true
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
:::warning
|
|
115
|
+
|
|
116
|
+
Only use `throwOnFailure: true` when allowing unlimited traffic is more
|
|
117
|
+
dangerous than rejecting all traffic. For most APIs, the fail-open default is
|
|
118
|
+
the safer choice.
|
|
119
|
+
|
|
120
|
+
:::
|
|
121
|
+
|
|
122
|
+
### Detecting fail-open conditions
|
|
123
|
+
|
|
124
|
+
Because fail-open requests succeed with a `200` (or other normal status code),
|
|
125
|
+
they do not produce a 429 log entry. To detect when the rate limit service is
|
|
126
|
+
unreachable, monitor for a sudden drop in 429 responses during periods when you
|
|
127
|
+
expect rate limiting to be active. A complete absence of 429s alongside steady
|
|
128
|
+
or increasing traffic volume is a strong signal that the service is in fail-open
|
|
129
|
+
mode.
|
|
130
|
+
|
|
131
|
+
## Strict vs. async mode in production
|
|
132
|
+
|
|
133
|
+
The `mode` option controls whether the rate limit check blocks the request or
|
|
134
|
+
runs in parallel with it.
|
|
135
|
+
|
|
136
|
+
### Strict mode (default)
|
|
137
|
+
|
|
138
|
+
In `strict` mode, every request waits for the rate limit service to confirm
|
|
139
|
+
whether the request is within limits before proceeding to the backend. This
|
|
140
|
+
provides exact enforcement -- no request exceeds the configured threshold.
|
|
141
|
+
|
|
142
|
+
The tradeoff is added latency on every request due to the round-trip to the rate
|
|
143
|
+
limit service.
|
|
144
|
+
|
|
145
|
+
### Async mode
|
|
146
|
+
|
|
147
|
+
In `async` mode, the request proceeds to the backend immediately while the rate
|
|
148
|
+
limit check runs in parallel. If the check determines the limit is exceeded, the
|
|
149
|
+
result applies to the _next_ request, not the current one.
|
|
150
|
+
|
|
151
|
+
This means some requests may get through after the limit is reached. In
|
|
152
|
+
practice, the overshoot depends on your request rate and the latency of the rate
|
|
153
|
+
limit check. For an API receiving 100 requests per second with a 10ms check
|
|
154
|
+
time, approximately one extra request may slip through per window.
|
|
155
|
+
|
|
156
|
+
:::tip
|
|
157
|
+
|
|
158
|
+
Use `async` mode when low latency matters more than exact enforcement -- for
|
|
159
|
+
example, on high-throughput public endpoints where a few extra requests over the
|
|
160
|
+
limit are acceptable. Use `strict` mode when precise enforcement is required,
|
|
161
|
+
such as billing-sensitive endpoints or APIs with hard backend capacity limits.
|
|
162
|
+
|
|
163
|
+
:::
|
|
164
|
+
|
|
165
|
+
## Common troubleshooting scenarios
|
|
166
|
+
|
|
167
|
+
### Unexpected 429 responses
|
|
168
|
+
|
|
169
|
+
**Shared IP addresses.** When `rateLimitBy` is set to `"ip"`, multiple clients
|
|
170
|
+
behind the same corporate proxy, cloud NAT, or shared Wi-Fi share a single rate
|
|
171
|
+
limit bucket. One heavy user exhausts the limit for everyone on that IP. Switch
|
|
172
|
+
to `rateLimitBy: "user"` for authenticated APIs to avoid this.
|
|
173
|
+
|
|
174
|
+
**Missing authentication policy.** The `"user"` mode requires an authentication
|
|
175
|
+
policy (such as API Key Authentication or JWT) earlier in the policy pipeline to
|
|
176
|
+
populate `request.user`. If no authentication policy runs first, the rate limit
|
|
177
|
+
policy returns an error instead of applying per-user limits. Verify that
|
|
178
|
+
authentication appears before rate limiting in the route's inbound policy list.
|
|
179
|
+
|
|
180
|
+
**Multiple rate limit policies on the same route.** If a route has both a
|
|
181
|
+
per-minute and a per-hour rate limit policy, a request can be rejected by either
|
|
182
|
+
one. Check all rate limit policies attached to the route, and verify the
|
|
183
|
+
ordering (longest time window first, then shorter durations).
|
|
184
|
+
|
|
185
|
+
**Lower limits than expected.** If you use a custom `rateLimitBy: "function"`,
|
|
186
|
+
verify that the function returns the expected `requestsAllowed` and
|
|
187
|
+
`timeWindowMinutes` values. Log the returned values during development to
|
|
188
|
+
confirm the function resolves correctly for each consumer.
|
|
189
|
+
|
|
190
|
+
### Rate limits not applying
|
|
191
|
+
|
|
192
|
+
**Policy not attached to the route.** Defining a rate limit policy in
|
|
193
|
+
`policies.json` does not activate it. The policy name must appear in the
|
|
194
|
+
`policies.inbound` array of each route in `routes.oas.json` where you want it
|
|
195
|
+
enforced. Verify the route configuration.
|
|
196
|
+
|
|
197
|
+
**Typo in the policy name.** The policy name in `routes.oas.json` must exactly
|
|
198
|
+
match the `name` field in `policies.json`. A mismatched name silently skips the
|
|
199
|
+
policy. Check for case sensitivity and extra whitespace.
|
|
200
|
+
|
|
201
|
+
**Custom function returning `undefined`.** When `rateLimitBy` is set to
|
|
202
|
+
`"function"` and the identifier function returns `undefined`, rate limiting is
|
|
203
|
+
skipped for that request entirely. This is by design -- it allows you to
|
|
204
|
+
selectively exempt certain requests -- but it can cause confusion if the
|
|
205
|
+
function has an unhandled code path that returns `undefined` unintentionally.
|
|
206
|
+
|
|
207
|
+
### Different behavior across environments
|
|
208
|
+
|
|
209
|
+
Rate limit counters are scoped per environment. Production, preview, and
|
|
210
|
+
working-copy environments each maintain their own separate counters. A request
|
|
211
|
+
that is rate-limited in production does not affect the counter in a preview
|
|
212
|
+
environment, and vice versa.
|
|
213
|
+
|
|
214
|
+
This means:
|
|
215
|
+
|
|
216
|
+
- Testing rate limits in a preview branch does not interfere with production
|
|
217
|
+
traffic.
|
|
218
|
+
- Rate limit thresholds you observe in a low-traffic preview environment may
|
|
219
|
+
behave differently under production load.
|
|
220
|
+
- After deploying a new environment, counters start fresh.
|
|
221
|
+
|
|
222
|
+
:::note
|
|
223
|
+
|
|
224
|
+
If you observe rate limits triggering in one environment but not another,
|
|
225
|
+
confirm that both environments use the same policy configuration and that the
|
|
226
|
+
traffic volume is comparable.
|
|
227
|
+
|
|
228
|
+
:::
|
|
229
|
+
|
|
230
|
+
## Related resources
|
|
231
|
+
|
|
232
|
+
- [Rate Limit Exceeded error](../errors/rate-limit-exceeded.mdx) --
|
|
233
|
+
Understanding the 429 response format and client-side remediation
|
|
234
|
+
- [How rate limiting works](./how-it-works.md) -- Algorithm details,
|
|
235
|
+
`rateLimitBy` modes, and combining policies
|
|
236
|
+
- [Logging](../articles/logging.mdx) -- Configuring log shipping to external
|
|
237
|
+
providers
|
|
238
|
+
- [Metrics Plugins](../articles/metrics-plugins.mdx) -- Sending request metrics
|
|
239
|
+
to Datadog, Dynatrace, New Relic, or OpenTelemetry
|
|
240
|
+
- [Proactive monitoring](../articles/monitoring-your-gateway.mdx) -- Health
|
|
241
|
+
checks and end-to-end gateway monitoring
|
|
242
|
+
- [Troubleshooting](../articles/troubleshooting.md) -- General gateway
|
|
243
|
+
troubleshooting guide
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
|
-
title: Per
|
|
3
|
-
sidebar_label: "Per-
|
|
2
|
+
title: Per-user rate limiting using a database and the ZoneCache
|
|
3
|
+
sidebar_label: "Per-user rate limits"
|
|
4
4
|
description:
|
|
5
5
|
Learn how to implement advanced dynamic rate limiting with database lookups
|
|
6
6
|
and ZoneCache for improved performance.
|
|
@@ -9,23 +9,22 @@ tags:
|
|
|
9
9
|
- caching
|
|
10
10
|
---
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
[dynamic rate limiting](./
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
This example shows a more advanced implementation of
|
|
13
|
+
[dynamic rate limiting](./dynamic-rate-limiting.mdx). It uses a database lookup
|
|
14
|
+
to get the customer details and combines that with the ZoneCache to improve
|
|
15
|
+
performance, reduce latency and lower the load on the database.
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
could use your own API, [Xata](https://xata.io),
|
|
19
|
-
[Firebase](https://firebase.com)
|
|
20
|
-
all.
|
|
17
|
+
This example uses [Supabase](https://supabase.com) as the database, but you
|
|
18
|
+
could use your own API, [Xata](https://xata.io), or
|
|
19
|
+
[Firebase](https://firebase.com). The implementation is similar for all.
|
|
21
20
|
|
|
22
21
|
If you haven't already, check out the
|
|
23
22
|
[rate-limiting policy](../policies/rate-limit-inbound.mdx) and the
|
|
24
|
-
[dynamic rate limiting
|
|
25
|
-
|
|
23
|
+
[dynamic rate limiting guide](./dynamic-rate-limiting.mdx). Then you should be
|
|
24
|
+
oriented to how dynamic rate limiting works.
|
|
26
25
|
|
|
27
|
-
Below is a full implementation of a custom rate limiting function. In
|
|
28
|
-
example
|
|
26
|
+
Below is a full implementation of a custom rate limiting function. In this
|
|
27
|
+
example it is a module called `per-user-rate-limiting.ts`.
|
|
29
28
|
|
|
30
29
|
```ts
|
|
31
30
|
import {
|
|
@@ -47,9 +46,18 @@ export async function rateLimitKey(
|
|
|
47
46
|
context: ZuploContext,
|
|
48
47
|
policyName: string,
|
|
49
48
|
): Promise<CustomRateLimitDetails> {
|
|
50
|
-
//
|
|
51
|
-
// This might be from a JWT or API Key metadata
|
|
52
|
-
|
|
49
|
+
// Get the customer ID from the user data.
|
|
50
|
+
// This might be from a JWT or API Key metadata.
|
|
51
|
+
// Ensure an authentication policy runs before this.
|
|
52
|
+
const customerId = request.user?.data?.customerId;
|
|
53
|
+
if (!customerId) {
|
|
54
|
+
context.log.error("No customerId found on request.user.data");
|
|
55
|
+
return {
|
|
56
|
+
key: request.user?.sub ?? "unknown",
|
|
57
|
+
requestsAllowed: FALLBACK_REQUESTS_ALLOWED,
|
|
58
|
+
timeWindowMinutes: 1,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
53
61
|
|
|
54
62
|
// We don't want to hit the database on every request
|
|
55
63
|
// So we'll use the fast zone cache to cache this data
|
|
@@ -94,17 +102,21 @@ export async function rateLimitKey(
|
|
|
94
102
|
The above function can be applied to a rate limiter with the following
|
|
95
103
|
configuration in policies
|
|
96
104
|
|
|
97
|
-
```json
|
|
105
|
+
```json title="config/policies.json"
|
|
98
106
|
{
|
|
99
|
-
"
|
|
100
|
-
"
|
|
101
|
-
"
|
|
102
|
-
"
|
|
103
|
-
"
|
|
104
|
-
"
|
|
105
|
-
|
|
106
|
-
"
|
|
107
|
-
"
|
|
107
|
+
"name": "my-per-user-rate-limit-policy",
|
|
108
|
+
"policyType": "rate-limit-inbound",
|
|
109
|
+
"handler": {
|
|
110
|
+
"export": "RateLimitInboundPolicy",
|
|
111
|
+
"module": "$import(@zuplo/runtime)",
|
|
112
|
+
"options": {
|
|
113
|
+
"rateLimitBy": "function",
|
|
114
|
+
"requestsAllowed": 100,
|
|
115
|
+
"timeWindowMinutes": 1,
|
|
116
|
+
"identifier": {
|
|
117
|
+
"export": "rateLimitKey",
|
|
118
|
+
"module": "$import(./modules/per-user-rate-limiting)"
|
|
119
|
+
}
|
|
108
120
|
}
|
|
109
121
|
}
|
|
110
122
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "zuplo",
|
|
3
|
-
"version": "6.70.
|
|
3
|
+
"version": "6.70.71",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "The programmable API Gateway",
|
|
6
6
|
"author": "Zuplo, Inc.",
|
|
@@ -19,9 +19,9 @@
|
|
|
19
19
|
"zuplo": "zuplo.js"
|
|
20
20
|
},
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@zuplo/cli": "6.70.
|
|
23
|
-
"@zuplo/core": "6.70.
|
|
24
|
-
"@zuplo/runtime": "6.70.
|
|
22
|
+
"@zuplo/cli": "6.70.71",
|
|
23
|
+
"@zuplo/core": "6.70.71",
|
|
24
|
+
"@zuplo/runtime": "6.70.71",
|
|
25
25
|
"@zuplo/test": "1.4.0"
|
|
26
26
|
}
|
|
27
27
|
}
|