mcp-backpressure 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 nulone
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,220 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-backpressure
3
+ Version: 0.1.2
4
+ Summary: Backpressure/concurrency control middleware for FastMCP MCP servers
5
+ Classifier: Development Status :: 3 - Alpha
6
+ Classifier: Intended Audience :: Developers
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.10
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: fastmcp>=2.9.0
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
18
+ Requires-Dist: ruff>=0.1; extra == "dev"
19
+ Requires-Dist: mypy>=1.0; extra == "dev"
20
+ Requires-Dist: pytest>=8.0; extra == "dev"
21
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
22
+ Requires-Dist: ruff>=0.4; extra == "dev"
23
+ Requires-Dist: mypy>=1.10; extra == "dev"
24
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
25
+ Dynamic: license-file
26
+
27
+ # mcp-backpressure
28
+
29
+ Backpressure and concurrency control middleware for [FastMCP](https://github.com/jlowin/fastmcp) MCP servers.
30
+
31
+ **Problem:** LLMs can generate hundreds of parallel tool calls, causing resource exhaustion, server crashes, and no structured feedback for clients to retry.
32
+
33
+ **Solution:** Middleware that limits concurrent executions, queues excess requests with timeout, and returns structured JSON-RPC overload errors.
34
+
35
+ ## Quickstart
36
+
37
+ ```python
38
+ from fastmcp import FastMCP
39
+ from mcp_backpressure import BackpressureMiddleware
40
+
41
+ mcp = FastMCP("MyServer")
42
+ mcp.add_middleware(BackpressureMiddleware(
43
+ max_concurrent=5, # Max parallel executions
44
+ queue_size=10, # Bounded queue for waiting requests
45
+ queue_timeout=30.0, # Queue wait timeout (seconds)
46
+ ))
47
+ ```
48
+
49
+ ## Installation
50
+
51
+ ```bash
52
+ pip install mcp-backpressure
53
+ ```
54
+
55
+ ## Features
56
+
57
+ - **Concurrency limiting**: Semaphore-based control of parallel executions
58
+ - **Bounded queue**: Optional FIFO queue with configurable size
59
+ - **Queue timeout**: Automatic timeout for queued requests with cleanup
60
+ - **Structured errors**: JSON-RPC compliant overload errors with detailed metrics
61
+ - **Metrics**: Real-time counters for active, queued, and rejected requests
62
+ - **Callback hook**: Optional notification on each overload event
63
+ - **Zero dependencies**: Only requires FastMCP and Python 3.10+
64
+
65
+ ## Usage
66
+
67
+ ### Basic Configuration
68
+
69
+ ```python
70
+ from mcp_backpressure import BackpressureMiddleware
71
+
72
+ mcp.add_middleware(BackpressureMiddleware(
73
+ max_concurrent=5, # Required: max parallel tool executions
74
+ queue_size=10, # Optional: bounded queue (0 = no queue)
75
+ queue_timeout=30.0, # Optional: seconds to wait in queue
76
+ overload_error_code=-32001, # Optional: JSON-RPC error code
77
+ on_overload=callback, # Optional: called on each overload
78
+ ))
79
+ ```
80
+
81
+ ### Parameters
82
+
83
+ | Parameter | Type | Default | Description |
84
+ |-----------|------|---------|-------------|
85
+ | `max_concurrent` | `int` | **required** | Maximum number of concurrent tool executions. Must be >= 1. |
86
+ | `queue_size` | `int` | `0` | Maximum queue size for waiting requests. Set to 0 to reject immediately when limit reached. |
87
+ | `queue_timeout` | `float` | `30.0` | Maximum time (seconds) a request can wait in queue before timing out. Must be > 0. |
88
+ | `overload_error_code` | `int` | `-32001` | JSON-RPC error code returned when server is overloaded. |
89
+ | `on_overload` | `Callable` | `None` | Optional callback `(error: OverloadError) -> None` invoked on each overload. |
90
+
91
+ ### Error Handling
92
+
93
+ When the server is overloaded, requests are rejected with a structured JSON-RPC error:
94
+
95
+ ```json
96
+ {
97
+ "code": -32001,
98
+ "message": "SERVER_OVERLOADED",
99
+ "data": {
100
+ "reason": "queue_full",
101
+ "active": 5,
102
+ "queued": 10,
103
+ "max_concurrent": 5,
104
+ "queue_size": 10,
105
+ "queue_timeout_ms": 30000,
106
+ "retry_after_ms": 1000
107
+ }
108
+ }
109
+ ```
110
+
111
+ #### Overload Reasons
112
+
113
+ | Reason | Description |
114
+ |--------|-------------|
115
+ | `concurrency_limit` | All execution slots full and no queue configured (queue_size=0) |
116
+ | `queue_full` | All execution slots and queue slots are full |
117
+ | `queue_timeout` | Request waited in queue longer than `queue_timeout` |
118
+
119
+ ### Metrics
120
+
121
+ Get real-time metrics from the middleware:
122
+
123
+ ```python
124
+ metrics = middleware.get_metrics() # Synchronous
125
+
126
+ print(f"Active: {metrics.active}")
127
+ print(f"Queued: {metrics.queued}")
128
+ print(f"Total rejected: {metrics.total_rejected}")
129
+ print(f"Rejected (concurrency): {metrics.rejected_concurrency_limit}")
130
+ print(f"Rejected (queue full): {metrics.rejected_queue_full}")
131
+ print(f"Rejected (timeout): {metrics.rejected_queue_timeout}")
132
+ ```
133
+
134
+ For async contexts, use `await middleware.get_metrics_async()`.
135
+
136
+ ### Callback Hook
137
+
138
+ Register a callback to be notified of each overload event:
139
+
140
+ ```python
141
+ def on_overload(error: OverloadError):
142
+ print(f"OVERLOAD: {error.reason} (active={error.active})")
143
+ # Log to monitoring system, update metrics, etc.
144
+
145
+ middleware = BackpressureMiddleware(
146
+ max_concurrent=5,
147
+ queue_size=10,
148
+ on_overload=on_overload,
149
+ )
150
+ ```
151
+
152
+ ## Examples
153
+
154
+ ### Simple Server
155
+
156
+ See [examples/simple_server.py](examples/simple_server.py) for a minimal FastMCP server with backpressure.
157
+
158
+ ### Load Simulation
159
+
160
+ Run [examples/load_simulation.py](examples/load_simulation.py) to see backpressure behavior under heavy concurrent load:
161
+
162
+ ```bash
163
+ python examples/load_simulation.py
164
+ ```
165
+
166
+ This simulates 30 concurrent requests against a server limited to 5 concurrent executions with a queue of 10, demonstrating how the middleware handles overload.
167
+
168
+ ## How It Works
169
+
170
+ The middleware provides two-level limiting:
171
+
172
+ 1. **Semaphore** (max_concurrent): Controls active executions
173
+ 2. **Bounded queue** (queue_size): Holds waiting requests with timeout
174
+
175
+ **Request flow:**
176
+ - If execution slot available → execute immediately
177
+ - If execution slots full and queue not full → wait in queue with timeout
178
+ - If queue full → reject with `queue_full`
179
+ - If timeout in queue → reject with `queue_timeout`
180
+
181
+ **Invariants** (guaranteed under all conditions):
182
+ - `active <= max_concurrent` ALWAYS
183
+ - `queued <= queue_size` ALWAYS
184
+ - Cancellation correctly frees slots and decrements counters
185
+ - Queue timeout removes item from queue
186
+
187
+ ## Development
188
+
189
+ ### Running Tests
190
+
191
+ ```bash
192
+ python -m pytest tests/ -v
193
+ ```
194
+
195
+ ### Linting
196
+
197
+ ```bash
198
+ ruff check src/ tests/
199
+ ```
200
+
201
+ ## Design Rationale
202
+
203
+ This library emerged from [python-sdk #1698](https://github.com/modelcontextprotocol/python-sdk/issues/1698) (closed as "not planned"). Key design decisions:
204
+
205
+ - **Global limits only** (v0.1): Per-client and per-tool limits deferred to v0.2+
206
+ - **Simple counters**: No Prometheus/OTEL dependencies by default
207
+ - **JSON-RPC errors**: Follows MCP protocol conventions
208
+ - **Monotonic time**: Queue timeouts use `time.monotonic()` for reliability
209
+
210
+ ## License
211
+
212
+ MIT
213
+
214
+ ## Contributing
215
+
216
+ Contributions welcome! Please open an issue before submitting PRs.
217
+
218
+ ## Changelog
219
+
220
+ See [CHANGELOG.md](https://github.com/nulone/mcp-backpressure/blob/main/CHANGELOG.md)
@@ -0,0 +1,194 @@
1
+ # mcp-backpressure
2
+
3
+ Backpressure and concurrency control middleware for [FastMCP](https://github.com/jlowin/fastmcp) MCP servers.
4
+
5
+ **Problem:** LLMs can generate hundreds of parallel tool calls, causing resource exhaustion, server crashes, and no structured feedback for clients to retry.
6
+
7
+ **Solution:** Middleware that limits concurrent executions, queues excess requests with timeout, and returns structured JSON-RPC overload errors.
8
+
9
+ ## Quickstart
10
+
11
+ ```python
12
+ from fastmcp import FastMCP
13
+ from mcp_backpressure import BackpressureMiddleware
14
+
15
+ mcp = FastMCP("MyServer")
16
+ mcp.add_middleware(BackpressureMiddleware(
17
+ max_concurrent=5, # Max parallel executions
18
+ queue_size=10, # Bounded queue for waiting requests
19
+ queue_timeout=30.0, # Queue wait timeout (seconds)
20
+ ))
21
+ ```
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install mcp-backpressure
27
+ ```
28
+
29
+ ## Features
30
+
31
+ - **Concurrency limiting**: Semaphore-based control of parallel executions
32
+ - **Bounded queue**: Optional FIFO queue with configurable size
33
+ - **Queue timeout**: Automatic timeout for queued requests with cleanup
34
+ - **Structured errors**: JSON-RPC compliant overload errors with detailed metrics
35
+ - **Metrics**: Real-time counters for active, queued, and rejected requests
36
+ - **Callback hook**: Optional notification on each overload event
37
+ - **Zero dependencies**: Only requires FastMCP and Python 3.10+
38
+
39
+ ## Usage
40
+
41
+ ### Basic Configuration
42
+
43
+ ```python
44
+ from mcp_backpressure import BackpressureMiddleware
45
+
46
+ mcp.add_middleware(BackpressureMiddleware(
47
+ max_concurrent=5, # Required: max parallel tool executions
48
+ queue_size=10, # Optional: bounded queue (0 = no queue)
49
+ queue_timeout=30.0, # Optional: seconds to wait in queue
50
+ overload_error_code=-32001, # Optional: JSON-RPC error code
51
+ on_overload=callback, # Optional: called on each overload
52
+ ))
53
+ ```
54
+
55
+ ### Parameters
56
+
57
+ | Parameter | Type | Default | Description |
58
+ |-----------|------|---------|-------------|
59
+ | `max_concurrent` | `int` | **required** | Maximum number of concurrent tool executions. Must be >= 1. |
60
+ | `queue_size` | `int` | `0` | Maximum queue size for waiting requests. Set to 0 to reject immediately when limit reached. |
61
+ | `queue_timeout` | `float` | `30.0` | Maximum time (seconds) a request can wait in queue before timing out. Must be > 0. |
62
+ | `overload_error_code` | `int` | `-32001` | JSON-RPC error code returned when server is overloaded. |
63
+ | `on_overload` | `Callable` | `None` | Optional callback `(error: OverloadError) -> None` invoked on each overload. |
64
+
65
+ ### Error Handling
66
+
67
+ When the server is overloaded, requests are rejected with a structured JSON-RPC error:
68
+
69
+ ```json
70
+ {
71
+ "code": -32001,
72
+ "message": "SERVER_OVERLOADED",
73
+ "data": {
74
+ "reason": "queue_full",
75
+ "active": 5,
76
+ "queued": 10,
77
+ "max_concurrent": 5,
78
+ "queue_size": 10,
79
+ "queue_timeout_ms": 30000,
80
+ "retry_after_ms": 1000
81
+ }
82
+ }
83
+ ```
84
+
85
+ #### Overload Reasons
86
+
87
+ | Reason | Description |
88
+ |--------|-------------|
89
+ | `concurrency_limit` | All execution slots full and no queue configured (queue_size=0) |
90
+ | `queue_full` | All execution slots and queue slots are full |
91
+ | `queue_timeout` | Request waited in queue longer than `queue_timeout` |
92
+
93
+ ### Metrics
94
+
95
+ Get real-time metrics from the middleware:
96
+
97
+ ```python
98
+ metrics = middleware.get_metrics() # Synchronous
99
+
100
+ print(f"Active: {metrics.active}")
101
+ print(f"Queued: {metrics.queued}")
102
+ print(f"Total rejected: {metrics.total_rejected}")
103
+ print(f"Rejected (concurrency): {metrics.rejected_concurrency_limit}")
104
+ print(f"Rejected (queue full): {metrics.rejected_queue_full}")
105
+ print(f"Rejected (timeout): {metrics.rejected_queue_timeout}")
106
+ ```
107
+
108
+ For async contexts, use `await middleware.get_metrics_async()`.
109
+
110
+ ### Callback Hook
111
+
112
+ Register a callback to be notified of each overload event:
113
+
114
+ ```python
115
+ def on_overload(error: OverloadError):
116
+ print(f"OVERLOAD: {error.reason} (active={error.active})")
117
+ # Log to monitoring system, update metrics, etc.
118
+
119
+ middleware = BackpressureMiddleware(
120
+ max_concurrent=5,
121
+ queue_size=10,
122
+ on_overload=on_overload,
123
+ )
124
+ ```
125
+
126
+ ## Examples
127
+
128
+ ### Simple Server
129
+
130
+ See [examples/simple_server.py](examples/simple_server.py) for a minimal FastMCP server with backpressure.
131
+
132
+ ### Load Simulation
133
+
134
+ Run [examples/load_simulation.py](examples/load_simulation.py) to see backpressure behavior under heavy concurrent load:
135
+
136
+ ```bash
137
+ python examples/load_simulation.py
138
+ ```
139
+
140
+ This simulates 30 concurrent requests against a server limited to 5 concurrent executions with a queue of 10, demonstrating how the middleware handles overload.
141
+
142
+ ## How It Works
143
+
144
+ The middleware provides two-level limiting:
145
+
146
+ 1. **Semaphore** (max_concurrent): Controls active executions
147
+ 2. **Bounded queue** (queue_size): Holds waiting requests with timeout
148
+
149
+ **Request flow:**
150
+ - If execution slot available → execute immediately
151
+ - If execution slots full and queue not full → wait in queue with timeout
152
+ - If queue full → reject with `queue_full`
153
+ - If timeout in queue → reject with `queue_timeout`
154
+
155
+ **Invariants** (guaranteed under all conditions):
156
+ - `active <= max_concurrent` ALWAYS
157
+ - `queued <= queue_size` ALWAYS
158
+ - Cancellation correctly frees slots and decrements counters
159
+ - Queue timeout removes item from queue
160
+
161
+ ## Development
162
+
163
+ ### Running Tests
164
+
165
+ ```bash
166
+ python -m pytest tests/ -v
167
+ ```
168
+
169
+ ### Linting
170
+
171
+ ```bash
172
+ ruff check src/ tests/
173
+ ```
174
+
175
+ ## Design Rationale
176
+
177
+ This library emerged from [python-sdk #1698](https://github.com/modelcontextprotocol/python-sdk/issues/1698) (closed as "not planned"). Key design decisions:
178
+
179
+ - **Global limits only** (v0.1): Per-client and per-tool limits deferred to v0.2+
180
+ - **Simple counters**: No Prometheus/OTEL dependencies by default
181
+ - **JSON-RPC errors**: Follows MCP protocol conventions
182
+ - **Monotonic time**: Queue timeouts use `time.monotonic()` for reliability
183
+
184
+ ## License
185
+
186
+ MIT
187
+
188
+ ## Contributing
189
+
190
+ Contributions welcome! Please open an issue before submitting PRs.
191
+
192
+ ## Changelog
193
+
194
+ See [CHANGELOG.md](https://github.com/nulone/mcp-backpressure/blob/main/CHANGELOG.md)
@@ -0,0 +1,83 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=68.0",
4
+ "wheel",
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "mcp-backpressure"
10
+ version = "0.1.2"
11
+ description = "Backpressure/concurrency control middleware for FastMCP MCP servers"
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ ]
22
+ dependencies = [
23
+ "fastmcp>=2.9.0",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ dev = [
28
+ "pytest>=7.0",
29
+ "pytest-cov>=4.0",
30
+ "ruff>=0.1",
31
+ "mypy>=1.0",
32
+ "pytest>=8.0",
33
+ "pytest-asyncio>=0.23",
34
+ "ruff>=0.4",
35
+ "mypy>=1.10",
36
+ "pytest-cov>=5.0",
37
+ ]
38
+
39
+ [tool.setuptools.packages.find]
40
+ where = [
41
+ "src",
42
+ ]
43
+
44
+ [tool.pytest.ini_options]
45
+ testpaths = [
46
+ "tests",
47
+ ]
48
+ python_files = [
49
+ "test_*.py",
50
+ ]
51
+ python_classes = [
52
+ "Test*",
53
+ ]
54
+ python_functions = [
55
+ "test_*",
56
+ ]
57
+ addopts = "-v --strict-markers --tb=short"
58
+ asyncio_mode = "auto"
59
+ asyncio_default_fixture_loop_scope = "function"
60
+
61
+ [tool.ruff]
62
+ line-length = 100
63
+ target-version = "py310"
64
+
65
+ [tool.ruff.lint]
66
+ select = [
67
+ "E",
68
+ "W",
69
+ "F",
70
+ "I",
71
+ "B",
72
+ "C4",
73
+ "UP",
74
+ ]
75
+ ignore = [
76
+ "E501",
77
+ ]
78
+
79
+ [tool.mypy]
80
+ python_version = "3.10"
81
+ strict = true
82
+ warn_return_any = true
83
+ warn_unused_configs = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,13 @@
1
+ """Backpressure/concurrency control middleware for FastMCP MCP servers"""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .errors import OverloadError
6
+ from .metrics import BackpressureMetrics
7
+ from .middleware import BackpressureMiddleware
8
+
9
+ __all__ = [
10
+ "BackpressureMiddleware",
11
+ "BackpressureMetrics",
12
+ "OverloadError",
13
+ ]
@@ -0,0 +1,75 @@
1
+ """Error classes for mcp-backpressure middleware"""
2
+
3
+ from typing import Any
4
+
5
+
6
+ class OverloadError(Exception):
7
+ """
8
+ Raised when server is overloaded and cannot accept more requests.
9
+
10
+ Follows JSON-RPC error format for MCP protocol.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ reason: str,
16
+ active: int,
17
+ max_concurrent: int,
18
+ code: int = -32001,
19
+ message: str = "SERVER_OVERLOADED",
20
+ queued: int = 0,
21
+ queue_size: int = 0,
22
+ queue_timeout_ms: int = 0,
23
+ retry_after_ms: int = 1000,
24
+ ):
25
+ """
26
+ Create an OverloadError.
27
+
28
+ Args:
29
+ reason: Overload reason ('concurrency_limit', 'queue_full', 'queue_timeout')
30
+ active: Number of currently active requests
31
+ max_concurrent: Maximum allowed concurrent requests
32
+ code: JSON-RPC error code (default: -32001)
33
+ message: Error message (default: 'SERVER_OVERLOADED')
34
+ queued: Number of requests in queue (default: 0)
35
+ queue_size: Maximum queue size (default: 0)
36
+ queue_timeout_ms: Queue timeout in milliseconds (default: 0)
37
+ retry_after_ms: Suggested retry delay in milliseconds (default: 1000)
38
+ """
39
+ self.code = code
40
+ self.message = message
41
+ self.reason = reason
42
+ self.active = active
43
+ self.max_concurrent = max_concurrent
44
+ self.queued = queued
45
+ self.queue_size = queue_size
46
+ self.queue_timeout_ms = queue_timeout_ms
47
+ self.retry_after_ms = retry_after_ms
48
+
49
+ super().__init__(f"{message}: {reason}")
50
+
51
+ @property
52
+ def data(self) -> dict[str, Any]:
53
+ """Get error data payload."""
54
+ return {
55
+ "reason": self.reason,
56
+ "active": self.active,
57
+ "queued": self.queued,
58
+ "max_concurrent": self.max_concurrent,
59
+ "queue_size": self.queue_size,
60
+ "queue_timeout_ms": self.queue_timeout_ms,
61
+ "retry_after_ms": self.retry_after_ms,
62
+ }
63
+
64
+ def to_json_rpc(self) -> dict[str, Any]:
65
+ """
66
+ Convert to JSON-RPC error object.
67
+
68
+ Returns:
69
+ dict with 'code', 'message', and 'data' keys
70
+ """
71
+ return {
72
+ "code": self.code,
73
+ "message": self.message,
74
+ "data": self.data,
75
+ }