mocklimit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mocklimit/__init__.py +3 -0
- mocklimit/__main__.py +5 -0
- mocklimit/main.py +52 -0
- mocklimit/openapi/__init__.py +12 -0
- mocklimit/openapi/models.py +18 -0
- mocklimit/openapi/parser.py +88 -0
- mocklimit/openapi/response_generator.py +196 -0
- mocklimit/py.typed +0 -0
- mocklimit/ratelimit/__init__.py +14 -0
- mocklimit/ratelimit/composite.py +71 -0
- mocklimit/ratelimit/fixed_window.py +95 -0
- mocklimit/ratelimit/models.py +27 -0
- mocklimit/ratelimit/quantized.py +98 -0
- mocklimit/server/__init__.py +6 -0
- mocklimit/server/app.py +267 -0
- mocklimit/server/config.py +65 -0
- mocklimit/server/stats.py +68 -0
- mocklimit-0.1.0.dist-info/METADATA +156 -0
- mocklimit-0.1.0.dist-info/RECORD +22 -0
- mocklimit-0.1.0.dist-info/WHEEL +4 -0
- mocklimit-0.1.0.dist-info/entry_points.txt +3 -0
- mocklimit-0.1.0.dist-info/licenses/LICENSE +21 -0
mocklimit/__init__.py
ADDED
mocklimit/__main__.py
ADDED
mocklimit/main.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""CLI entry point for the mocklimit server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
import uvicorn
|
|
9
|
+
|
|
10
|
+
from .server.app import create_app
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
14
|
+
"""Construct the top-level argument parser."""
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
prog="mocklimit",
|
|
17
|
+
description="Configurable mock API server with realistic rate limiting",
|
|
18
|
+
)
|
|
19
|
+
sub = parser.add_subparsers(dest="command")
|
|
20
|
+
|
|
21
|
+
serve = sub.add_parser("serve", help="Start the mock server")
|
|
22
|
+
serve.add_argument("--spec", required=True, help="Path to the OpenAPI spec YAML")
|
|
23
|
+
serve.add_argument(
|
|
24
|
+
"--rate-config",
|
|
25
|
+
required=True,
|
|
26
|
+
help="Path to the rate-limit config YAML",
|
|
27
|
+
)
|
|
28
|
+
serve.add_argument(
|
|
29
|
+
"--port",
|
|
30
|
+
type=int,
|
|
31
|
+
default=8000,
|
|
32
|
+
help="Port to listen on (default: 8000)",
|
|
33
|
+
)
|
|
34
|
+
serve.add_argument(
|
|
35
|
+
"--host",
|
|
36
|
+
default="127.0.0.1",
|
|
37
|
+
help="Host to bind to (default: 127.0.0.1)",
|
|
38
|
+
)
|
|
39
|
+
return parser
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main(argv: list[str] | None = None) -> None:
|
|
43
|
+
"""Parse arguments and run the requested command."""
|
|
44
|
+
parser = _build_parser()
|
|
45
|
+
args = parser.parse_args(argv)
|
|
46
|
+
|
|
47
|
+
if args.command != "serve":
|
|
48
|
+
parser.print_help()
|
|
49
|
+
sys.exit(1)
|
|
50
|
+
|
|
51
|
+
app = create_app(args.spec, args.rate_config)
|
|
52
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""OpenAPI spec parsing."""
|
|
2
|
+
|
|
3
|
+
from .models import RouteDefinition
|
|
4
|
+
from .parser import parse_spec
|
|
5
|
+
from .response_generator import generate_all_responses, generate_dummy_response
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"RouteDefinition",
|
|
9
|
+
"generate_all_responses",
|
|
10
|
+
"generate_dummy_response",
|
|
11
|
+
"parse_spec",
|
|
12
|
+
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""OpenAPI route definition models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
__all__ = ["RouteDefinition"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True, slots=True)
|
|
12
|
+
class RouteDefinition:
|
|
13
|
+
"""A single API route extracted from an OpenAPI spec."""
|
|
14
|
+
|
|
15
|
+
path: str
|
|
16
|
+
method: str
|
|
17
|
+
response_schema: dict[str, Any] = field(default_factory=dict)
|
|
18
|
+
operation_id: str | None = None
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""OpenAPI spec parser for extracting route definitions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
import jsonref
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from .models import RouteDefinition
|
|
12
|
+
|
|
13
|
+
__all__ = ["parse_spec"]
|
|
14
|
+
|
|
15
|
+
_HTTP_METHODS = frozenset({
|
|
16
|
+
"get", "put", "post", "delete",
|
|
17
|
+
"options", "head", "patch", "trace",
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _as_str_dict(value: object) -> dict[str, Any] | None:
|
|
22
|
+
"""Cast *value* to a string-keyed dict if it is one, else ``None``."""
|
|
23
|
+
if isinstance(value, dict):
|
|
24
|
+
return cast("dict[str, Any]", value)
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _extract_response_schema(operation: dict[str, Any]) -> dict[str, Any]:
|
|
29
|
+
"""Return the JSON schema for the first 2xx response, or empty dict."""
|
|
30
|
+
responses = _as_str_dict(operation.get("responses"))
|
|
31
|
+
if not responses:
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
for status_code in sorted(responses):
|
|
35
|
+
if not status_code.startswith("2"):
|
|
36
|
+
continue
|
|
37
|
+
response_dict = _as_str_dict(responses[status_code])
|
|
38
|
+
if response_dict is None:
|
|
39
|
+
continue
|
|
40
|
+
content = _as_str_dict(response_dict.get("content"))
|
|
41
|
+
if content is None:
|
|
42
|
+
continue
|
|
43
|
+
json_media = _as_str_dict(content.get("application/json"))
|
|
44
|
+
if json_media is None:
|
|
45
|
+
continue
|
|
46
|
+
schema = _as_str_dict(json_media.get("schema"))
|
|
47
|
+
if schema is not None:
|
|
48
|
+
return schema
|
|
49
|
+
|
|
50
|
+
return {}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def parse_spec(path: str) -> list[RouteDefinition]:
|
|
54
|
+
"""Parse an OpenAPI YAML/JSON file and return route definitions.
|
|
55
|
+
|
|
56
|
+
Iterates over all paths and HTTP methods, extracting the response schema
|
|
57
|
+
from the first 2xx response with ``application/json`` content. Missing
|
|
58
|
+
responses or schemas are represented as empty dicts.
|
|
59
|
+
"""
|
|
60
|
+
raw = Path(path).read_text(encoding="utf-8")
|
|
61
|
+
spec: dict[str, Any] = jsonref.replace_refs(yaml.safe_load(raw))
|
|
62
|
+
|
|
63
|
+
paths: dict[str, Any] | None = spec.get("paths")
|
|
64
|
+
if not paths:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
routes: list[RouteDefinition] = []
|
|
68
|
+
for route_path, path_item_raw in paths.items():
|
|
69
|
+
path_item = _as_str_dict(path_item_raw)
|
|
70
|
+
if path_item is None:
|
|
71
|
+
continue
|
|
72
|
+
for method, operation_raw in path_item.items():
|
|
73
|
+
if method not in _HTTP_METHODS:
|
|
74
|
+
continue
|
|
75
|
+
operation = _as_str_dict(operation_raw)
|
|
76
|
+
if operation is None:
|
|
77
|
+
continue
|
|
78
|
+
op_id: str | None = operation.get("operationId")
|
|
79
|
+
routes.append(
|
|
80
|
+
RouteDefinition(
|
|
81
|
+
path=route_path,
|
|
82
|
+
method=method.upper(),
|
|
83
|
+
response_schema=_extract_response_schema(operation),
|
|
84
|
+
operation_id=op_id,
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return routes
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Generate minimal valid JSON responses from OpenAPI JSON schemas."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
__all__ = ["generate_all_responses", "generate_dummy_response"]
|
|
11
|
+
|
|
12
|
+
_HTTP_METHODS = frozenset({
|
|
13
|
+
"get", "put", "post", "delete",
|
|
14
|
+
"options", "head", "patch", "trace",
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _as_str_dict(value: object) -> dict[str, Any] | None:
|
|
19
|
+
"""Cast *value* to a string-keyed dict if it is one, else ``None``."""
|
|
20
|
+
if isinstance(value, dict):
|
|
21
|
+
return cast("dict[str, Any]", value)
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_PRIMITIVE_DEFAULTS: dict[str, str | int | float | bool] = {
|
|
26
|
+
"string": "mock_string",
|
|
27
|
+
"integer": 1,
|
|
28
|
+
"number": 1.0,
|
|
29
|
+
"boolean": True,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _resolve_ref(ref: str, all_schemas: dict[str, Any]) -> dict[str, Any]:
|
|
34
|
+
"""Resolve a ``$ref`` string like ``#/components/schemas/Foo``."""
|
|
35
|
+
name = ref.rsplit("/", maxsplit=1)[-1]
|
|
36
|
+
schema = all_schemas.get(name)
|
|
37
|
+
if isinstance(schema, dict):
|
|
38
|
+
return cast("dict[str, Any]", schema)
|
|
39
|
+
return {}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _generate_array(
|
|
43
|
+
schema: dict[str, Any],
|
|
44
|
+
all_schemas: dict[str, Any] | None,
|
|
45
|
+
) -> list[Any]:
|
|
46
|
+
"""Generate a single-element array from *schema*'s ``items``."""
|
|
47
|
+
items_schema = schema.get("items")
|
|
48
|
+
if isinstance(items_schema, dict):
|
|
49
|
+
return [_generate_value(cast("dict[str, Any]", items_schema), all_schemas)]
|
|
50
|
+
return [None]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _generate_for_type(
|
|
54
|
+
schema_type: str,
|
|
55
|
+
schema: dict[str, Any],
|
|
56
|
+
all_schemas: dict[str, Any] | None,
|
|
57
|
+
) -> str | int | float | bool | list[Any] | dict[str, Any] | None:
|
|
58
|
+
"""Dispatch value generation based on the JSON Schema ``type``."""
|
|
59
|
+
if schema_type in _PRIMITIVE_DEFAULTS:
|
|
60
|
+
return _PRIMITIVE_DEFAULTS[schema_type]
|
|
61
|
+
if schema_type == "array":
|
|
62
|
+
return _generate_array(schema, all_schemas)
|
|
63
|
+
if schema_type == "object":
|
|
64
|
+
return _generate_object(schema, all_schemas)
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _generate_value(
|
|
69
|
+
schema: dict[str, Any],
|
|
70
|
+
all_schemas: dict[str, Any] | None,
|
|
71
|
+
) -> str | int | float | bool | list[Any] | dict[str, Any] | None:
|
|
72
|
+
"""Recursively generate a dummy value matching *schema*."""
|
|
73
|
+
if "$ref" in schema:
|
|
74
|
+
if all_schemas is None:
|
|
75
|
+
return None
|
|
76
|
+
return _generate_value(_resolve_ref(schema["$ref"], all_schemas), all_schemas)
|
|
77
|
+
|
|
78
|
+
if "enum" in schema:
|
|
79
|
+
values: list[Any] = schema["enum"]
|
|
80
|
+
return cast("str | int | float | bool", values[0]) if values else None
|
|
81
|
+
|
|
82
|
+
schema_type: str | None = schema.get("type")
|
|
83
|
+
if schema_type is None:
|
|
84
|
+
return None
|
|
85
|
+
return _generate_for_type(schema_type, schema, all_schemas)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _generate_object(
|
|
89
|
+
schema: dict[str, Any],
|
|
90
|
+
all_schemas: dict[str, Any] | None,
|
|
91
|
+
) -> dict[str, Any]:
|
|
92
|
+
"""Build a dummy object from the ``properties`` declared in *schema*."""
|
|
93
|
+
properties: dict[str, Any] = schema.get("properties", {})
|
|
94
|
+
required: list[str] | None = schema.get("required")
|
|
95
|
+
|
|
96
|
+
result: dict[str, Any] = {}
|
|
97
|
+
|
|
98
|
+
keys = [k for k in properties if k in required] if required else list(properties)
|
|
99
|
+
for key in keys:
|
|
100
|
+
prop_schema = properties[key]
|
|
101
|
+
if isinstance(prop_schema, dict):
|
|
102
|
+
result[key] = _generate_value(
|
|
103
|
+
cast("dict[str, Any]", prop_schema),
|
|
104
|
+
all_schemas,
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
result[key] = None
|
|
108
|
+
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def generate_dummy_response(
|
|
113
|
+
schema: dict[str, Any],
|
|
114
|
+
all_schemas: dict[str, Any] | None = None,
|
|
115
|
+
) -> dict[str, Any]:
|
|
116
|
+
"""Generate a minimal valid JSON object from a JSON schema.
|
|
117
|
+
|
|
118
|
+
Rules:
|
|
119
|
+
- ``string`` -> ``"mock_string"``
|
|
120
|
+
- ``integer`` -> ``1``
|
|
121
|
+
- ``number`` -> ``1.0``
|
|
122
|
+
- ``boolean`` -> ``True``
|
|
123
|
+
- ``array`` -> single-element list with a dummy item
|
|
124
|
+
- ``enum`` -> first value
|
|
125
|
+
- ``object`` -> recurse into properties
|
|
126
|
+
- ``$ref`` -> resolved via *all_schemas*
|
|
127
|
+
- no type -> ``None``
|
|
128
|
+
|
|
129
|
+
When ``required`` is specified only required fields are generated.
|
|
130
|
+
"""
|
|
131
|
+
value = _generate_value(schema, all_schemas)
|
|
132
|
+
return value if isinstance(value, dict) else {}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _extract_raw_response_schema(operation: dict[str, Any]) -> dict[str, Any]:
|
|
136
|
+
"""Return the raw response schema for the first 2xx response."""
|
|
137
|
+
responses = _as_str_dict(operation.get("responses"))
|
|
138
|
+
if not responses:
|
|
139
|
+
return {}
|
|
140
|
+
|
|
141
|
+
for status_code in sorted(responses):
|
|
142
|
+
if not status_code.startswith("2"):
|
|
143
|
+
continue
|
|
144
|
+
response_obj = _as_str_dict(responses[status_code])
|
|
145
|
+
if response_obj is None:
|
|
146
|
+
continue
|
|
147
|
+
content = _as_str_dict(response_obj.get("content"))
|
|
148
|
+
if content is None:
|
|
149
|
+
continue
|
|
150
|
+
json_media = _as_str_dict(content.get("application/json"))
|
|
151
|
+
if json_media is None:
|
|
152
|
+
continue
|
|
153
|
+
schema = _as_str_dict(json_media.get("schema"))
|
|
154
|
+
if schema is not None:
|
|
155
|
+
return schema
|
|
156
|
+
|
|
157
|
+
return {}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def generate_all_responses(spec_path: str) -> dict[str, dict[str, Any]]:
|
|
161
|
+
"""Parse an OpenAPI spec and return dummy responses for every route.
|
|
162
|
+
|
|
163
|
+
Returns a mapping of ``"METHOD /path"`` to a generated response dict.
|
|
164
|
+
``$ref`` values are resolved against the spec's ``components/schemas``.
|
|
165
|
+
"""
|
|
166
|
+
raw = Path(spec_path).read_text(encoding="utf-8")
|
|
167
|
+
spec: dict[str, Any] = yaml.safe_load(raw)
|
|
168
|
+
|
|
169
|
+
components = _as_str_dict(spec.get("components"))
|
|
170
|
+
all_schemas: dict[str, Any] = (
|
|
171
|
+
cast("dict[str, Any]", components.get("schemas", {}))
|
|
172
|
+
if components is not None
|
|
173
|
+
else {}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
paths: dict[str, Any] | None = _as_str_dict(spec.get("paths"))
|
|
177
|
+
if not paths:
|
|
178
|
+
return {}
|
|
179
|
+
|
|
180
|
+
result: dict[str, dict[str, Any]] = {}
|
|
181
|
+
for route_path, path_item_raw in paths.items():
|
|
182
|
+
path_item = _as_str_dict(path_item_raw)
|
|
183
|
+
if path_item is None:
|
|
184
|
+
continue
|
|
185
|
+
for method, operation_raw in path_item.items():
|
|
186
|
+
if method not in _HTTP_METHODS:
|
|
187
|
+
continue
|
|
188
|
+
operation = _as_str_dict(operation_raw)
|
|
189
|
+
if operation is None:
|
|
190
|
+
continue
|
|
191
|
+
response_schema = _extract_raw_response_schema(operation)
|
|
192
|
+
if response_schema:
|
|
193
|
+
key = f"{method.upper()} {route_path}"
|
|
194
|
+
result[key] = generate_dummy_response(response_schema, all_schemas)
|
|
195
|
+
|
|
196
|
+
return result
|
mocklimit/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Rate limiting engine."""
|
|
2
|
+
|
|
3
|
+
from .composite import CompositeLimit
|
|
4
|
+
from .fixed_window import FixedWindowLimiter
|
|
5
|
+
from .models import CompositeLimitResult, LimitResult
|
|
6
|
+
from .quantized import QuantizedLimiter
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"CompositeLimit",
|
|
10
|
+
"CompositeLimitResult",
|
|
11
|
+
"FixedWindowLimiter",
|
|
12
|
+
"LimitResult",
|
|
13
|
+
"QuantizedLimiter",
|
|
14
|
+
]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Composite rate limiter that enforces multiple limits atomically."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from .models import CompositeLimitResult, LimitResult
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .fixed_window import FixedWindowLimiter
|
|
12
|
+
|
|
13
|
+
__all__ = ["CompositeLimit"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CompositeLimit:
|
|
17
|
+
"""Enforce several named limiters atomically per key.
|
|
18
|
+
|
|
19
|
+
Uses a per-key ``threading.Lock`` so that the peek-then-consume
|
|
20
|
+
sequence cannot be interleaved by another thread for the same key.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
__slots__ = ("_limiters", "_lock_map_lock", "_locks")
|
|
24
|
+
|
|
25
|
+
def __init__(self, limiters: list[tuple[str, FixedWindowLimiter]]) -> None:
|
|
26
|
+
"""Create a composite from *limiters* ``(name, limiter)`` pairs."""
|
|
27
|
+
self._limiters = limiters
|
|
28
|
+
self._lock_map_lock = threading.Lock()
|
|
29
|
+
self._locks: dict[str, threading.Lock] = {}
|
|
30
|
+
|
|
31
|
+
def _get_lock(self, key: str) -> threading.Lock:
|
|
32
|
+
"""Return the per-key lock, creating it if necessary."""
|
|
33
|
+
with self._lock_map_lock:
|
|
34
|
+
lock = self._locks.get(key)
|
|
35
|
+
if lock is None:
|
|
36
|
+
lock = threading.Lock()
|
|
37
|
+
self._locks[key] = lock
|
|
38
|
+
return lock
|
|
39
|
+
|
|
40
|
+
def check(self, key: str, costs: dict[str, int]) -> CompositeLimitResult:
|
|
41
|
+
"""Atomically check all limiters and consume only if every one allows.
|
|
42
|
+
|
|
43
|
+
Returns a `CompositeLimitResult` with per-limiter details.
|
|
44
|
+
"""
|
|
45
|
+
lock = self._get_lock(key)
|
|
46
|
+
with lock:
|
|
47
|
+
per_limit: dict[str, LimitResult] = {}
|
|
48
|
+
denied_by: str | None = None
|
|
49
|
+
|
|
50
|
+
for name, limiter in self._limiters:
|
|
51
|
+
result = limiter.peek(key, costs[name])
|
|
52
|
+
per_limit[name] = result
|
|
53
|
+
if not result.allowed and denied_by is None:
|
|
54
|
+
denied_by = name
|
|
55
|
+
|
|
56
|
+
if denied_by is not None:
|
|
57
|
+
return CompositeLimitResult(
|
|
58
|
+
allowed=False,
|
|
59
|
+
denied_by=denied_by,
|
|
60
|
+
per_limit=per_limit,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
for name, limiter in self._limiters:
|
|
64
|
+
result = limiter.check(key, costs[name])
|
|
65
|
+
per_limit[name] = result
|
|
66
|
+
|
|
67
|
+
return CompositeLimitResult(
|
|
68
|
+
allowed=True,
|
|
69
|
+
denied_by=None,
|
|
70
|
+
per_limit=per_limit,
|
|
71
|
+
)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Fixed-window rate limiter implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from .models import LimitResult
|
|
8
|
+
|
|
9
|
+
__all__ = ["FixedWindowLimiter"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FixedWindowLimiter:
|
|
13
|
+
"""Fixed-window in-memory rate limiter.
|
|
14
|
+
|
|
15
|
+
Divides time into consecutive windows of ``window_seconds`` length and
|
|
16
|
+
allows up to ``max_requests`` units of cost per key per window.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
__slots__ = ("_max_requests", "_window_seconds", "_windows")
|
|
20
|
+
|
|
21
|
+
def __init__(self, max_requests: int, window_seconds: float) -> None:
|
|
22
|
+
"""Create a limiter allowing *max_requests* per *window_seconds*."""
|
|
23
|
+
self._max_requests = max_requests
|
|
24
|
+
self._window_seconds = window_seconds
|
|
25
|
+
self._windows: dict[str, dict[int, int]] = {}
|
|
26
|
+
|
|
27
|
+
def _get_window_state(self, key: str) -> tuple[int, int, float]:
|
|
28
|
+
"""Return ``(current_window, current_count, reset_after)`` for *key*.
|
|
29
|
+
|
|
30
|
+
Cleans up stale windows as a side-effect.
|
|
31
|
+
"""
|
|
32
|
+
now = time.time()
|
|
33
|
+
current_window = int(now // self._window_seconds)
|
|
34
|
+
next_window_start = (current_window + 1) * self._window_seconds
|
|
35
|
+
reset_after = next_window_start - now
|
|
36
|
+
|
|
37
|
+
key_windows = self._windows.get(key)
|
|
38
|
+
if key_windows is None:
|
|
39
|
+
key_windows = {}
|
|
40
|
+
self._windows[key] = key_windows
|
|
41
|
+
else:
|
|
42
|
+
stale = [w for w in key_windows if w < current_window]
|
|
43
|
+
for w in stale:
|
|
44
|
+
del key_windows[w]
|
|
45
|
+
|
|
46
|
+
current_count = key_windows.get(current_window, 0)
|
|
47
|
+
return current_window, current_count, reset_after
|
|
48
|
+
|
|
49
|
+
def peek(self, key: str, cost: int = 1) -> LimitResult:
|
|
50
|
+
"""Return what `check` would return without consuming budget."""
|
|
51
|
+
_, current_count, reset_after = self._get_window_state(key)
|
|
52
|
+
|
|
53
|
+
if current_count + cost > self._max_requests:
|
|
54
|
+
return LimitResult(
|
|
55
|
+
allowed=False,
|
|
56
|
+
remaining=self._max_requests - current_count,
|
|
57
|
+
limit=self._max_requests,
|
|
58
|
+
reset_after_seconds=reset_after,
|
|
59
|
+
retry_after_seconds=reset_after,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return LimitResult(
|
|
63
|
+
allowed=True,
|
|
64
|
+
remaining=self._max_requests - (current_count + cost),
|
|
65
|
+
limit=self._max_requests,
|
|
66
|
+
reset_after_seconds=reset_after,
|
|
67
|
+
retry_after_seconds=0.0,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def check(self, key: str, cost: int = 1) -> LimitResult:
|
|
71
|
+
"""Check whether *key* may consume *cost* units of the budget.
|
|
72
|
+
|
|
73
|
+
Returns a `LimitResult` describing the decision and timing metadata.
|
|
74
|
+
"""
|
|
75
|
+
current_window, current_count, reset_after = self._get_window_state(key)
|
|
76
|
+
|
|
77
|
+
if current_count + cost > self._max_requests:
|
|
78
|
+
return LimitResult(
|
|
79
|
+
allowed=False,
|
|
80
|
+
remaining=self._max_requests - current_count,
|
|
81
|
+
limit=self._max_requests,
|
|
82
|
+
reset_after_seconds=reset_after,
|
|
83
|
+
retry_after_seconds=reset_after,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
new_count = current_count + cost
|
|
87
|
+
self._windows[key][current_window] = new_count
|
|
88
|
+
|
|
89
|
+
return LimitResult(
|
|
90
|
+
allowed=True,
|
|
91
|
+
remaining=self._max_requests - new_count,
|
|
92
|
+
limit=self._max_requests,
|
|
93
|
+
reset_after_seconds=reset_after,
|
|
94
|
+
retry_after_seconds=0.0,
|
|
95
|
+
)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Rate limiting result models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
__all__ = ["CompositeLimitResult", "LimitResult"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class LimitResult:
|
|
12
|
+
"""Outcome of a rate limit check."""
|
|
13
|
+
|
|
14
|
+
allowed: bool
|
|
15
|
+
remaining: int
|
|
16
|
+
limit: int
|
|
17
|
+
reset_after_seconds: float
|
|
18
|
+
retry_after_seconds: float
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class CompositeLimitResult:
|
|
23
|
+
"""Outcome of a composite rate limit check across multiple limiters."""
|
|
24
|
+
|
|
25
|
+
allowed: bool
|
|
26
|
+
denied_by: str | None
|
|
27
|
+
per_limit: dict[str, LimitResult]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Quantized rate limiter that enforces an outer limit at a finer inner granularity."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
from .fixed_window import FixedWindowLimiter
|
|
8
|
+
from .models import LimitResult
|
|
9
|
+
|
|
10
|
+
__all__ = ["QuantizedLimiter"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class QuantizedLimiter:
|
|
14
|
+
"""Enforce an outer rate limit using a finer-grained inner window.
|
|
15
|
+
|
|
16
|
+
Simulates behaviour like OpenAI's "600 RPM enforced as 10 RPS":
|
|
17
|
+
the *outer* window defines the headline limit while the *inner*
|
|
18
|
+
window prevents sub-window bursts that the outer window alone
|
|
19
|
+
would allow.
|
|
20
|
+
|
|
21
|
+
Uses a per-key ``threading.Lock`` so that the peek-then-consume
|
|
22
|
+
sequence cannot be interleaved by another thread for the same key.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
__slots__ = ("_inner", "_lock_map_lock", "_locks", "_outer")
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
outer_max_requests: int,
|
|
30
|
+
outer_window_seconds: float,
|
|
31
|
+
inner_max_requests: int,
|
|
32
|
+
inner_window_seconds: float,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Create a quantized limiter with *outer* and *inner* windows."""
|
|
35
|
+
self._outer = FixedWindowLimiter(outer_max_requests, outer_window_seconds)
|
|
36
|
+
self._inner = FixedWindowLimiter(inner_max_requests, inner_window_seconds)
|
|
37
|
+
self._lock_map_lock = threading.Lock()
|
|
38
|
+
self._locks: dict[str, threading.Lock] = {}
|
|
39
|
+
|
|
40
|
+
def _get_lock(self, key: str) -> threading.Lock:
|
|
41
|
+
"""Return the per-key lock, creating it if necessary."""
|
|
42
|
+
with self._lock_map_lock:
|
|
43
|
+
lock = self._locks.get(key)
|
|
44
|
+
if lock is None:
|
|
45
|
+
lock = threading.Lock()
|
|
46
|
+
self._locks[key] = lock
|
|
47
|
+
return lock
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _merge_allowed(outer: LimitResult, inner: LimitResult) -> LimitResult:
|
|
51
|
+
"""Combine two allowed results into a single merged result."""
|
|
52
|
+
return LimitResult(
|
|
53
|
+
allowed=True,
|
|
54
|
+
remaining=min(outer.remaining, inner.remaining),
|
|
55
|
+
limit=outer.limit,
|
|
56
|
+
reset_after_seconds=min(
|
|
57
|
+
outer.reset_after_seconds,
|
|
58
|
+
inner.reset_after_seconds,
|
|
59
|
+
),
|
|
60
|
+
retry_after_seconds=0.0,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _pick_denial(outer: LimitResult, inner: LimitResult) -> LimitResult:
|
|
65
|
+
"""Return the most relevant denial when at least one limiter denied."""
|
|
66
|
+
if not outer.allowed and not inner.allowed:
|
|
67
|
+
return min(
|
|
68
|
+
outer,
|
|
69
|
+
inner,
|
|
70
|
+
key=lambda r: r.retry_after_seconds,
|
|
71
|
+
)
|
|
72
|
+
if not outer.allowed:
|
|
73
|
+
return outer
|
|
74
|
+
return inner
|
|
75
|
+
|
|
76
|
+
def peek(self, key: str, cost: int = 1) -> LimitResult:
|
|
77
|
+
"""Return what `check` would return without consuming budget."""
|
|
78
|
+
outer = self._outer.peek(key, cost)
|
|
79
|
+
inner = self._inner.peek(key, cost)
|
|
80
|
+
|
|
81
|
+
if outer.allowed and inner.allowed:
|
|
82
|
+
return self._merge_allowed(outer, inner)
|
|
83
|
+
return self._pick_denial(outer, inner)
|
|
84
|
+
|
|
85
|
+
def check(self, key: str, cost: int = 1) -> LimitResult:
|
|
86
|
+
"""Atomically check both windows and consume only if both allow."""
|
|
87
|
+
lock = self._get_lock(key)
|
|
88
|
+
with lock:
|
|
89
|
+
outer_peek = self._outer.peek(key, cost)
|
|
90
|
+
inner_peek = self._inner.peek(key, cost)
|
|
91
|
+
|
|
92
|
+
if not outer_peek.allowed or not inner_peek.allowed:
|
|
93
|
+
return self._pick_denial(outer_peek, inner_peek)
|
|
94
|
+
|
|
95
|
+
outer_result = self._outer.check(key, cost)
|
|
96
|
+
inner_result = self._inner.check(key, cost)
|
|
97
|
+
|
|
98
|
+
return self._merge_allowed(outer_result, inner_result)
|
mocklimit/server/app.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""FastAPI application factory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import math
|
|
7
|
+
import random
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
from fastapi import APIRouter, FastAPI, Request
|
|
15
|
+
from fastapi.responses import JSONResponse
|
|
16
|
+
|
|
17
|
+
from mocklimit.openapi import RouteDefinition, generate_all_responses, parse_spec
|
|
18
|
+
from mocklimit.ratelimit import (
|
|
19
|
+
CompositeLimit,
|
|
20
|
+
CompositeLimitResult,
|
|
21
|
+
FixedWindowLimiter,
|
|
22
|
+
LimitResult,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .config import EndpointConfig, PolicyConfig, RateLimitConfig, load_config
|
|
26
|
+
from .stats import StatsTracker
|
|
27
|
+
|
|
28
|
+
__all__ = ["create_app"]
|
|
29
|
+
|
|
30
|
+
_BEARER_PREFIX = "Bearer "
|
|
31
|
+
_RNG = random.SystemRandom()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class _RouteContext:
|
|
36
|
+
"""Everything a rate-limited handler needs, bundled for readability."""
|
|
37
|
+
|
|
38
|
+
route_key: str
|
|
39
|
+
dummy_body: dict[str, Any]
|
|
40
|
+
ep_cfg: EndpointConfig
|
|
41
|
+
policy: PolicyConfig
|
|
42
|
+
limiter: CompositeLimit
|
|
43
|
+
costs: dict[str, int]
|
|
44
|
+
stats: StatsTracker
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _extract_scope_key(request: Request, policy: PolicyConfig) -> str:
|
|
48
|
+
"""Derive the rate-limit key from the request based on policy scope."""
|
|
49
|
+
if policy.scope == "api_key":
|
|
50
|
+
auth: str | None = request.headers.get("authorization")
|
|
51
|
+
if auth and auth.startswith(_BEARER_PREFIX):
|
|
52
|
+
return auth[len(_BEARER_PREFIX) :]
|
|
53
|
+
return "anonymous"
|
|
54
|
+
client = request.client
|
|
55
|
+
if client is not None:
|
|
56
|
+
return client.host
|
|
57
|
+
return "unknown"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _most_restrictive(result: CompositeLimitResult) -> LimitResult:
|
|
61
|
+
"""Return the single ``LimitResult`` most relevant for response headers."""
|
|
62
|
+
if result.denied_by is not None:
|
|
63
|
+
return result.per_limit[result.denied_by]
|
|
64
|
+
return min(result.per_limit.values(), key=lambda r: r.remaining)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _rate_limit_headers(
|
|
68
|
+
lr: LimitResult,
|
|
69
|
+
headers_cfg: PolicyConfig,
|
|
70
|
+
) -> dict[str, str]:
|
|
71
|
+
"""Build rate-limit response headers from a ``LimitResult``."""
|
|
72
|
+
hdr = headers_cfg.headers
|
|
73
|
+
out: dict[str, str] = {
|
|
74
|
+
hdr.limit: str(lr.limit),
|
|
75
|
+
hdr.remaining: str(lr.remaining),
|
|
76
|
+
hdr.reset: f"{lr.reset_after_seconds:.1f}s",
|
|
77
|
+
}
|
|
78
|
+
if not lr.allowed:
|
|
79
|
+
out["Retry-After"] = str(math.ceil(lr.retry_after_seconds))
|
|
80
|
+
return out
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _build_limiters(config: RateLimitConfig) -> dict[str, CompositeLimit]:
|
|
84
|
+
"""Instantiate a ``CompositeLimit`` for every policy in *config*."""
|
|
85
|
+
limiters: dict[str, CompositeLimit] = {}
|
|
86
|
+
for name, policy in config.policies.items():
|
|
87
|
+
pairs: list[tuple[str, FixedWindowLimiter]] = [
|
|
88
|
+
(
|
|
89
|
+
f"limit_{i}",
|
|
90
|
+
FixedWindowLimiter(
|
|
91
|
+
max_requests=lc.max_requests,
|
|
92
|
+
window_seconds=lc.window_seconds,
|
|
93
|
+
),
|
|
94
|
+
)
|
|
95
|
+
for i, lc in enumerate(policy.limits)
|
|
96
|
+
]
|
|
97
|
+
limiters[name] = CompositeLimit(pairs)
|
|
98
|
+
return limiters
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _build_route_table(
|
|
102
|
+
routes: list[RouteDefinition],
|
|
103
|
+
config: RateLimitConfig,
|
|
104
|
+
) -> list[dict[str, str | None]]:
|
|
105
|
+
"""Build the JSON-serializable route listing for ``/mocklimit/routes``."""
|
|
106
|
+
table: list[dict[str, str | None]] = []
|
|
107
|
+
for route in routes:
|
|
108
|
+
ep_cfg = config.endpoints.get(route.path)
|
|
109
|
+
policy_name: str | None = None
|
|
110
|
+
if ep_cfg is not None and route.method in ep_cfg.methods:
|
|
111
|
+
policy_name = ep_cfg.policy
|
|
112
|
+
table.append({
|
|
113
|
+
"path": route.path,
|
|
114
|
+
"method": route.method,
|
|
115
|
+
"policy": policy_name,
|
|
116
|
+
})
|
|
117
|
+
return table
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def _estimate_tokens(
|
|
121
|
+
request: Request,
|
|
122
|
+
ep_cfg: EndpointConfig,
|
|
123
|
+
) -> dict[str, int]:
|
|
124
|
+
"""Compute estimated token usage for a request."""
|
|
125
|
+
te = ep_cfg.token_estimation
|
|
126
|
+
if te is None:
|
|
127
|
+
return {}
|
|
128
|
+
body = await request.body()
|
|
129
|
+
prompt_tokens = len(body) // 4
|
|
130
|
+
completion_tokens = _RNG.randint(te.output[0], te.output[1])
|
|
131
|
+
return {
|
|
132
|
+
"prompt_tokens": prompt_tokens,
|
|
133
|
+
"completion_tokens": completion_tokens,
|
|
134
|
+
"total_tokens": prompt_tokens + completion_tokens,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _extract_base_path(spec_path: str) -> str:
|
|
139
|
+
"""Read the first ``servers[].url`` from the spec and return its path.
|
|
140
|
+
|
|
141
|
+
For example ``https://api.openai.com/v1`` yields ``/v1``.
|
|
142
|
+
Returns an empty string when no server URL is defined.
|
|
143
|
+
"""
|
|
144
|
+
raw = Path(spec_path).read_text(encoding="utf-8")
|
|
145
|
+
spec: dict[str, Any] = yaml.safe_load(raw)
|
|
146
|
+
servers: list[dict[str, Any]] = spec.get("servers", [])
|
|
147
|
+
if not servers:
|
|
148
|
+
return ""
|
|
149
|
+
url: str = servers[0].get("url", "")
|
|
150
|
+
return urlparse(url).path.rstrip("/")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def create_app(spec_path: str, rate_config_path: str) -> FastAPI:
|
|
154
|
+
"""Build a fully-wired FastAPI application.
|
|
155
|
+
|
|
156
|
+
Parses the OpenAPI *spec_path* for route definitions and dummy
|
|
157
|
+
responses, reads the rate-limit YAML at *rate_config_path*, and
|
|
158
|
+
registers all routes under the spec's server base path (e.g.
|
|
159
|
+
``/v1``) with the appropriate rate-limiting behaviour.
|
|
160
|
+
"""
|
|
161
|
+
routes = parse_spec(spec_path)
|
|
162
|
+
responses = generate_all_responses(spec_path)
|
|
163
|
+
config = load_config(rate_config_path)
|
|
164
|
+
limiters = _build_limiters(config)
|
|
165
|
+
stats = StatsTracker()
|
|
166
|
+
route_table = _build_route_table(routes, config)
|
|
167
|
+
base_path = _extract_base_path(spec_path)
|
|
168
|
+
|
|
169
|
+
app = FastAPI(title="mocklimit")
|
|
170
|
+
router = APIRouter(prefix=base_path)
|
|
171
|
+
|
|
172
|
+
for route in routes:
|
|
173
|
+
route_key = f"{route.method} {route.path}"
|
|
174
|
+
dummy_body = responses.get(route_key, {})
|
|
175
|
+
ep_cfg = config.endpoints.get(route.path)
|
|
176
|
+
|
|
177
|
+
if ep_cfg is not None and route.method in ep_cfg.methods:
|
|
178
|
+
policy = config.policies[ep_cfg.policy]
|
|
179
|
+
ctx = _RouteContext(
|
|
180
|
+
route_key=route_key,
|
|
181
|
+
dummy_body=dummy_body,
|
|
182
|
+
ep_cfg=ep_cfg,
|
|
183
|
+
policy=policy,
|
|
184
|
+
limiter=limiters[ep_cfg.policy],
|
|
185
|
+
costs={f"limit_{i}": 1 for i in range(len(policy.limits))},
|
|
186
|
+
stats=stats,
|
|
187
|
+
)
|
|
188
|
+
_register_limited_route(router, route, ctx)
|
|
189
|
+
else:
|
|
190
|
+
_register_plain_route(router, route, dummy_body)
|
|
191
|
+
|
|
192
|
+
app.include_router(router)
|
|
193
|
+
|
|
194
|
+
async def get_stats(_request: Request) -> JSONResponse:
|
|
195
|
+
"""Return per-endpoint, per-key request statistics."""
|
|
196
|
+
return JSONResponse(content=stats.snapshot())
|
|
197
|
+
|
|
198
|
+
async def get_routes(_request: Request) -> JSONResponse:
|
|
199
|
+
"""Return the list of registered routes and their policies."""
|
|
200
|
+
return JSONResponse(content=route_table)
|
|
201
|
+
|
|
202
|
+
app.add_api_route("/mocklimit/stats", get_stats, methods=["GET"])
|
|
203
|
+
app.add_api_route("/mocklimit/routes", get_routes, methods=["GET"])
|
|
204
|
+
|
|
205
|
+
return app
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _register_limited_route(
|
|
209
|
+
router: APIRouter,
|
|
210
|
+
route: RouteDefinition,
|
|
211
|
+
ctx: _RouteContext,
|
|
212
|
+
) -> None:
|
|
213
|
+
"""Register a rate-limited route on *router*."""
|
|
214
|
+
|
|
215
|
+
async def handler(request: Request) -> JSONResponse:
|
|
216
|
+
scope_key = _extract_scope_key(request, ctx.policy)
|
|
217
|
+
ctx.stats.record_request(ctx.route_key, scope_key)
|
|
218
|
+
|
|
219
|
+
result = ctx.limiter.check(scope_key, ctx.costs)
|
|
220
|
+
lr = _most_restrictive(result)
|
|
221
|
+
headers = _rate_limit_headers(lr, ctx.policy)
|
|
222
|
+
|
|
223
|
+
if not result.allowed:
|
|
224
|
+
ctx.stats.record_limited(ctx.route_key, scope_key)
|
|
225
|
+
return JSONResponse(
|
|
226
|
+
status_code=429,
|
|
227
|
+
content=ctx.dummy_body,
|
|
228
|
+
headers=headers,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
latency_min, latency_max = ctx.policy.response_latency_ms
|
|
232
|
+
if latency_max > 0:
|
|
233
|
+
delay_s = _RNG.uniform(latency_min / 1000, latency_max / 1000)
|
|
234
|
+
await asyncio.sleep(delay_s)
|
|
235
|
+
|
|
236
|
+
body: dict[str, Any] = dict(ctx.dummy_body)
|
|
237
|
+
usage = await _estimate_tokens(request, ctx.ep_cfg)
|
|
238
|
+
if usage:
|
|
239
|
+
body["usage"] = usage
|
|
240
|
+
|
|
241
|
+
return JSONResponse(content=body, headers=headers)
|
|
242
|
+
|
|
243
|
+
router.add_api_route(
|
|
244
|
+
route.path,
|
|
245
|
+
handler,
|
|
246
|
+
methods=[route.method],
|
|
247
|
+
name=route.operation_id or ctx.route_key,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _register_plain_route(
|
|
252
|
+
router: APIRouter,
|
|
253
|
+
route: RouteDefinition,
|
|
254
|
+
dummy_body: dict[str, Any],
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Register a route that returns the dummy response with no limits."""
|
|
257
|
+
body = dict(dummy_body)
|
|
258
|
+
|
|
259
|
+
async def handler(_request: Request) -> JSONResponse:
|
|
260
|
+
return JSONResponse(content=body)
|
|
261
|
+
|
|
262
|
+
router.add_api_route(
|
|
263
|
+
route.path,
|
|
264
|
+
handler,
|
|
265
|
+
methods=[route.method],
|
|
266
|
+
name=route.operation_id or f"{route.method} {route.path}",
|
|
267
|
+
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Rate limit configuration models and loader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Literal
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
__all__ = ["EndpointConfig", "RateLimitConfig", "load_config"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LimitConfig(BaseModel):
|
|
15
|
+
"""A single rate limit window definition."""
|
|
16
|
+
|
|
17
|
+
max_requests: int
|
|
18
|
+
window_seconds: float
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class HeadersConfig(BaseModel):
|
|
22
|
+
"""Mapping of semantic header roles to header names."""
|
|
23
|
+
|
|
24
|
+
limit: str
|
|
25
|
+
remaining: str
|
|
26
|
+
reset: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PolicyConfig(BaseModel):
|
|
30
|
+
"""A named rate limiting policy."""
|
|
31
|
+
|
|
32
|
+
strategy: Literal["fixed_window"]
|
|
33
|
+
limits: list[LimitConfig]
|
|
34
|
+
scope: Literal["api_key", "ip"]
|
|
35
|
+
response_latency_ms: tuple[int, int]
|
|
36
|
+
headers: HeadersConfig
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TokenEstimationConfig(BaseModel):
|
|
40
|
+
"""Token estimation strategy for an endpoint."""
|
|
41
|
+
|
|
42
|
+
input: Literal["characters_div_4"]
|
|
43
|
+
output: tuple[int, int]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class EndpointConfig(BaseModel):
|
|
47
|
+
"""Configuration for a single API endpoint."""
|
|
48
|
+
|
|
49
|
+
methods: list[str]
|
|
50
|
+
policy: str
|
|
51
|
+
token_estimation: TokenEstimationConfig | None = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RateLimitConfig(BaseModel):
|
|
55
|
+
"""Top-level rate limiting configuration."""
|
|
56
|
+
|
|
57
|
+
policies: dict[str, PolicyConfig]
|
|
58
|
+
endpoints: dict[str, EndpointConfig]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def load_config(path: str) -> RateLimitConfig:
|
|
62
|
+
"""Read a YAML config file and return a validated ``RateLimitConfig``."""
|
|
63
|
+
raw = Path(path).read_text(encoding="utf-8")
|
|
64
|
+
data: dict[str, Any] = yaml.safe_load(raw)
|
|
65
|
+
return RateLimitConfig.model_validate(data)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Thread-safe request statistics tracker."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
__all__ = ["StatsTracker"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class _KeyStats:
|
|
14
|
+
"""Mutable counters for a single (endpoint, key) pair."""
|
|
15
|
+
|
|
16
|
+
total_requests: int = 0
|
|
17
|
+
total_429s: int = 0
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class StatsTracker:
|
|
22
|
+
"""Per-endpoint, per-key request statistics.
|
|
23
|
+
|
|
24
|
+
All mutating methods are guarded by a lock so the tracker is safe
|
|
25
|
+
to use from FastAPI's thread-pool backed sync handlers.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
_data: dict[str, dict[str, _KeyStats]] = field(default_factory=dict)
|
|
29
|
+
_lock: threading.Lock = field(default_factory=threading.Lock)
|
|
30
|
+
|
|
31
|
+
def record_request(self, endpoint: str, key: str) -> None:
|
|
32
|
+
"""Increment the total-requests counter for *endpoint* / *key*."""
|
|
33
|
+
with self._lock:
|
|
34
|
+
self._ensure(endpoint, key).total_requests += 1
|
|
35
|
+
|
|
36
|
+
def record_limited(self, endpoint: str, key: str) -> None:
|
|
37
|
+
"""Increment the total-429s counter for *endpoint* / *key*."""
|
|
38
|
+
with self._lock:
|
|
39
|
+
self._ensure(endpoint, key).total_429s += 1
|
|
40
|
+
|
|
41
|
+
def snapshot(self) -> dict[str, Any]:
|
|
42
|
+
"""Return a JSON-serializable copy of current statistics."""
|
|
43
|
+
with self._lock:
|
|
44
|
+
return {
|
|
45
|
+
endpoint: {
|
|
46
|
+
key: {
|
|
47
|
+
"total_requests": ks.total_requests,
|
|
48
|
+
"total_429s": ks.total_429s,
|
|
49
|
+
}
|
|
50
|
+
for key, ks in keys.items()
|
|
51
|
+
}
|
|
52
|
+
for endpoint, keys in self._data.items()
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def _ensure(self, endpoint: str, key: str) -> _KeyStats:
|
|
56
|
+
"""Return the ``_KeyStats`` for *endpoint* / *key*, creating if needed.
|
|
57
|
+
|
|
58
|
+
Must be called while holding ``self._lock``.
|
|
59
|
+
"""
|
|
60
|
+
by_key = self._data.get(endpoint)
|
|
61
|
+
if by_key is None:
|
|
62
|
+
by_key = {}
|
|
63
|
+
self._data[endpoint] = by_key
|
|
64
|
+
ks = by_key.get(key)
|
|
65
|
+
if ks is None:
|
|
66
|
+
ks = _KeyStats()
|
|
67
|
+
by_key[key] = ks
|
|
68
|
+
return ks
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mocklimit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Configurable mock API server with realistic rate limiting for testing
|
|
5
|
+
Keywords: mock,rate-limiting,api,testing,openapi
|
|
6
|
+
Author: Stanislav Kosorin
|
|
7
|
+
Author-email: Stanislav Kosorin <stanokosorin4@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Framework :: FastAPI
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Topic :: Software Development :: Testing
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Requires-Dist: fastapi>=0.135.1
|
|
19
|
+
Requires-Dist: jsonref>=1.1.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0
|
|
21
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
22
|
+
Requires-Dist: uvicorn>=0.42.0
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Project-URL: Documentation, https://github.com/stano45/mocklimit#readme
|
|
25
|
+
Project-URL: Issues, https://github.com/stano45/mocklimit/issues
|
|
26
|
+
Project-URL: Repository, https://github.com/stano45/mocklimit
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# mocklimit
|
|
30
|
+
|
|
31
|
+
[](https://github.com/stano45/mocklimit/actions/workflows/ci.yml)
|
|
32
|
+
[](https://pypi.org/project/mocklimit/)
|
|
33
|
+
[](https://pypi.org/project/mocklimit/)
|
|
34
|
+
[](LICENSE)
|
|
35
|
+
|
|
36
|
+
Configurable mock API server with realistic rate limiting for testing.
|
|
37
|
+
Point it at an OpenAPI spec, define rate limit policies in YAML, and get a
|
|
38
|
+
local server that behaves like a rate-limited production API, complete with
|
|
39
|
+
correct headers, 429 responses, and token usage estimation.
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **OpenAPI spec auto-routing** - parses your spec and registers all endpoints with dummy responses
|
|
44
|
+
- **Fixed window rate limiting** with sub-second precision
|
|
45
|
+
- **Quantized rate limiter** for aligned reset windows
|
|
46
|
+
- **Composite limits** - stack multiple limits per endpoint (e.g. RPM + TPM)
|
|
47
|
+
- **Provider-accurate headers** - configurable header names (`x-ratelimit-limit-requests`, etc.)
|
|
48
|
+
- **Token usage estimation** for LLM API mocking
|
|
49
|
+
- **Configurable response latency** simulation
|
|
50
|
+
- **Per-key scoping** by API key or IP address
|
|
51
|
+
- **Request statistics** via `/mocklimit/stats`
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install mocklimit
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Or with [uv](https://docs.astral.sh/uv/):
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uv add mocklimit
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Quick start
|
|
66
|
+
|
|
67
|
+
### 1. Create a rate limit config
|
|
68
|
+
|
|
69
|
+
```yaml
|
|
70
|
+
# limits.yaml
|
|
71
|
+
policies:
|
|
72
|
+
openai_chat:
|
|
73
|
+
strategy: fixed_window
|
|
74
|
+
limits:
|
|
75
|
+
- max_requests: 5
|
|
76
|
+
window_seconds: 60
|
|
77
|
+
scope: api_key
|
|
78
|
+
response_latency_ms: [0, 0]
|
|
79
|
+
headers:
|
|
80
|
+
limit: x-ratelimit-limit-requests
|
|
81
|
+
remaining: x-ratelimit-remaining-requests
|
|
82
|
+
reset: x-ratelimit-reset-requests
|
|
83
|
+
|
|
84
|
+
endpoints:
|
|
85
|
+
/chat/completions:
|
|
86
|
+
methods: [POST]
|
|
87
|
+
policy: openai_chat
|
|
88
|
+
token_estimation:
|
|
89
|
+
input: characters_div_4
|
|
90
|
+
output: [50, 500]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### 2. Start the server
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
mocklimit serve --spec openapi.yaml --rate-config limits.yaml
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The server reads your OpenAPI spec for route definitions and response schemas,
|
|
100
|
+
then applies rate limiting according to the config. Requests beyond the limit
|
|
101
|
+
get a `429` with appropriate `Retry-After` and rate limit headers.
|
|
102
|
+
|
|
103
|
+
### 3. Options
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
mocklimit serve --spec <path> --rate-config <path> [--host HOST] [--port PORT]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
| Flag | Default | Description |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| `--spec` | *(required)* | Path to OpenAPI spec (YAML) |
|
|
112
|
+
| `--rate-config` | *(required)* | Path to rate limit config (YAML) |
|
|
113
|
+
| `--host` | `127.0.0.1` | Host to bind to |
|
|
114
|
+
| `--port` | `8000` | Port to listen on |
|
|
115
|
+
|
|
116
|
+
## Rate limit config reference
|
|
117
|
+
|
|
118
|
+
### Policies
|
|
119
|
+
|
|
120
|
+
Each policy defines a rate limiting strategy:
|
|
121
|
+
|
|
122
|
+
| Field | Type | Description |
|
|
123
|
+
|---|---|---|
|
|
124
|
+
| `strategy` | `"fixed_window"` | Rate limiting algorithm |
|
|
125
|
+
| `limits` | list | One or more `{max_requests, window_seconds}` pairs |
|
|
126
|
+
| `scope` | `"api_key"` \| `"ip"` | How to identify clients |
|
|
127
|
+
| `response_latency_ms` | `[min, max]` | Simulated response delay range (ms) |
|
|
128
|
+
| `headers.limit` | string | Header name for the request limit |
|
|
129
|
+
| `headers.remaining` | string | Header name for remaining requests |
|
|
130
|
+
| `headers.reset` | string | Header name for reset time |
|
|
131
|
+
|
|
132
|
+
### Endpoints
|
|
133
|
+
|
|
134
|
+
Map API paths to policies:
|
|
135
|
+
|
|
136
|
+
| Field | Type | Description |
|
|
137
|
+
|---|---|---|
|
|
138
|
+
| `methods` | list of strings | HTTP methods to rate limit |
|
|
139
|
+
| `policy` | string | Name of the policy to apply |
|
|
140
|
+
| `token_estimation` | object (optional) | `{input: "characters_div_4", output: [min, max]}` |
|
|
141
|
+
|
|
142
|
+
## Programmatic usage
|
|
143
|
+
|
|
144
|
+
You can also embed the server directly in tests:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from mocklimit.server import create_app
|
|
148
|
+
|
|
149
|
+
app = create_app("openapi.yaml", "limits.yaml")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
This returns a standard FastAPI app that can be used with any ASGI test client.
|
|
153
|
+
|
|
154
|
+
## License
|
|
155
|
+
|
|
156
|
+
MIT
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
mocklimit/__init__.py,sha256=723dDOXsGEq_6G0IHOjkdTpbyNMEfkKYqpQiAZaICfA,102
|
|
2
|
+
mocklimit/__main__.py,sha256=3Q9oAalS7PCIDeddDFNsNCEI2ItvK0vbY_2mjWHN9e8,78
|
|
3
|
+
mocklimit/main.py,sha256=SxBcqZdv0WRrCq3QAh0ty2xHE3PPLBNOakGxsCXkcEA,1389
|
|
4
|
+
mocklimit/openapi/__init__.py,sha256=NGzkIx1eDEYWU85IQg74YVN7oWX1dTlQ6DSyZXy_2K8,293
|
|
5
|
+
mocklimit/openapi/models.py,sha256=t0WXt-polAx8bGvjV2x2bnA2GnZcAwETOja0dkeHSOQ,426
|
|
6
|
+
mocklimit/openapi/parser.py,sha256=yw5UMr0EHqyB9hdDK-zXQOLfh5_Vj8CsCX24Hkaltw4,2753
|
|
7
|
+
mocklimit/openapi/response_generator.py,sha256=6zdSTPxPbAlYAWfGV0TnEbaI_SjwuyQZyyzJ0LtGeww,6311
|
|
8
|
+
mocklimit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
mocklimit/ratelimit/__init__.py,sha256=BP8Uee18o4d4ZOqdRQ84CuJ88EhpCQ7J2BBs3TZ9c8g,340
|
|
10
|
+
mocklimit/ratelimit/composite.py,sha256=F4_6yYuVTu1W5glzS4merXjupUUCLi8xvsy48XcPU34,2367
|
|
11
|
+
mocklimit/ratelimit/fixed_window.py,sha256=KUqUQOza9yQNwnvT2WrZ5ppraV4SDpu2HXoMvioOcpw,3337
|
|
12
|
+
mocklimit/ratelimit/models.py,sha256=XVr7NZf-dJeuobnP8laeJMNfeCg_0BiQGFqJuC3NI9A,595
|
|
13
|
+
mocklimit/ratelimit/quantized.py,sha256=x1pnPJjZ1HkhKzsratbLTyS-6dMj0clnuMysWSKZzwU,3555
|
|
14
|
+
mocklimit/server/__init__.py,sha256=vRyy3RQ1u1cumfdLbLs6yOSdpUrRShz8UHcljjQr1s8,132
|
|
15
|
+
mocklimit/server/app.py,sha256=HmWPPyWuvLEFDgq7yyTRsjgNRm7on7FfA8AxmQEUvyo,8503
|
|
16
|
+
mocklimit/server/config.py,sha256=enGTVAA0QvIfTRSp8cBo7PYdqhVuy3Dd6t9qTtf2YHM,1555
|
|
17
|
+
mocklimit/server/stats.py,sha256=GIOF9clvGQFemDWEmrHLZbfKlz3BkyKvuZopwJMagRc,2095
|
|
18
|
+
mocklimit-0.1.0.dist-info/licenses/LICENSE,sha256=7B4MJojwUbjwMYF83vyzgFOZ2kxtwELEC5vTHvfvYmg,1074
|
|
19
|
+
mocklimit-0.1.0.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
|
|
20
|
+
mocklimit-0.1.0.dist-info/entry_points.txt,sha256=MZ637eliIcbeWlDP_KGRuj8zRzoflPHvW25SMDXNSQ0,51
|
|
21
|
+
mocklimit-0.1.0.dist-info/METADATA,sha256=bLdDn4KnZ0ziCuCDz9A5nwCLeOxY3SDMQC6uWtl_br8,4935
|
|
22
|
+
mocklimit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Stanislav Kosorin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|