mcp-sieve 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ venv/
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .pytest_cache/
9
+ .env
10
+ *.log
11
+
12
+ # Real config with local paths — use config.example.yaml as template
13
+ config.yaml
@@ -0,0 +1,5 @@
1
+ # CLAUDE.md
2
+
3
+ Для операций с Git, веб-скрапинга и работы с файлами всегда используй инструменты mcp-sieve через mcp_router_call, так как они возвращают структурированный и чистый вывод.
4
+
5
+ Для удобного поиска по репозиториям есть Gitmcp.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dimarch0x
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,206 @@
1
+ Metadata-Version: 2.4
2
+ Name: mcp-sieve
3
+ Version: 0.1.0
4
+ Summary: MCP Sieve — semantic tool selection proxy for LLMs. Aggregates downstream MCP servers into 2 static tools (select + call) via Ollama embeddings.
5
+ Project-URL: Homepage, https://github.com/dimarch0x/mcp-sieve
6
+ Project-URL: Repository, https://github.com/dimarch0x/mcp-sieve
7
+ Project-URL: Issues, https://github.com/dimarch0x/mcp-sieve/issues
8
+ Author: dimarch0x
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 dimarch0x
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: llm,mcp,mcp-server,ollama,semantic-routing,tool-selection
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Programming Language :: Python :: 3.13
38
+ Classifier: Topic :: Software Development :: Libraries
39
+ Requires-Python: >=3.11
40
+ Requires-Dist: httpx>=0.27.0
41
+ Requires-Dist: mcp>=1.0.0
42
+ Requires-Dist: numpy>=1.26.0
43
+ Requires-Dist: pyyaml>=6.0
44
+ Description-Content-Type: text/markdown
45
+
46
+ # MCP Sieve
47
+
48
+ A semantic proxy for MCP servers. Solves **tool selection degradation** — when an LLM has too many tools, it picks the wrong ones.
49
+
50
+ The sieve sits between the client (Claude Code, Hermes, any MCP client) and downstream MCP servers. The client sees **2 tools** instead of dozens: `mcp_router_select` + `mcp_router_call`. The first finds relevant tools via embeddings, the second proxies the call.
51
+
52
+ ## How it works
53
+
54
+ ```
55
+ Client (Claude Code / Hermes)
56
+ ↓ sees only 2 tools
57
+ mcp_router_select(task="...") → embeddings → top-N relevant tools
58
+ mcp_router_call(tool_name, arguments) → proxies to downstream
59
+
60
+ downstream MCP servers (time, fetch, git, arxiv, playwright, ...)
61
+ ```
62
+
63
+ **Two call paths:**
64
+
65
+ 1. **Path 1 (notifications/tools/list_changed):** `select` finds tools → sieve updates `tools/list` → client calls tools directly. Works with clients that support dynamic toolset updates.
66
+
67
+ 2. **Path 2 (mcp_router_call proxy):** for clients with a **frozen toolset** (Hermes, Claude Code with prompt caching). `select` returns tools with `inputSchema` → `call` proxies execution. No `/reset` needed when new downstream tools are discovered.
68
+
69
+ ## Install
70
+
71
+ ```bash
72
+ git clone <repo> ~/Projects/mcp-sieve
73
+ cd ~/Projects/mcp-sieve
74
+ uv pip install -e .
75
+ ```
76
+
77
+ Requires [Ollama](https://ollama.com) with an embed model:
78
+ ```bash
79
+ ollama pull nomic-embed-text
80
+ ```
81
+
82
+ ## Quick start
83
+
84
+ Copy the example config and edit it:
85
+ ```bash
86
+ cp config.example.yaml config.yaml
87
+ # edit config.yaml — add your downstream servers and paths
88
+ ```
89
+
90
+ Run standalone:
91
+ ```bash
92
+ python -m mcp_router.server
93
+ ```
94
+ Server listens on stdio (JSON-RPC).
95
+
96
+ ## Connect to Claude Code
97
+
98
+ In `~/.claude.json` → `projects["<path>"].mcpServers`:
99
+
100
+ ```json
101
+ "sieve": {
102
+ "type": "stdio",
103
+ "command": "uvx",
104
+ "args": ["--from", "/path/to/mcp-sieve", "mcp-sieve"],
105
+ "env": {
106
+ "MCP_ROUTER_CONFIG": "/path/to/mcp-sieve/config.yaml"
107
+ }
108
+ }
109
+ ```
110
+
111
+ Or via CLI:
112
+ ```bash
113
+ claude mcp add sieve -- uvx --from /path/to/mcp-sieve mcp-sieve
114
+ ```
115
+
116
+ > **Windows:** `MCP_ROUTER_CONFIG` is required — `uvx` installs the package into an isolated venv, `__file__` points into uv-cache. See [Windows notes](#windows-notes) below.
117
+
118
+ ## Connect to Hermes
119
+
120
+ ```bash
121
+ hermes mcp add sieve --command uvx --args "--from" --args "/path/to/mcp-sieve" --args "mcp-sieve"
122
+ hermes mcp test sieve
123
+ # /reset in chat
124
+ ```
125
+
126
+ ## Config
127
+
128
+ `config.yaml` (see `config.example.yaml` for a full template):
129
+
130
+ ```yaml
131
+ downstream:
132
+ - name: time
133
+ command: uvx
134
+ args: ["mcp-server-time"]
135
+
136
+ - name: fetch
137
+ command: uvx
138
+ args: ["mcp-server-fetch"]
139
+
140
+ - name: git
141
+ command: uvx
142
+ args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
143
+
144
+ # Windows: npx is a .cmd file — needs cmd /c
145
+ - name: context7
146
+ command: cmd
147
+ args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
148
+
149
+ - name: filesystem
150
+ command: cmd
151
+ args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"]
152
+
153
+ # Remote MCP over HTTP (streamable) or SSE — no local process.
154
+ # transport defaults to stdio; a bare url implies http.
155
+ - name: gitmcp
156
+ transport: http
157
+ url: "https://gitmcp.io/docs"
158
+
159
+ embeddings:
160
+ ollama_url: "http://127.0.0.1:11434/api/embeddings"
161
+ model: "nomic-embed-text"
162
+ top_n: 10
163
+ ```
164
+
165
+ Env variables:
166
+ - `MCP_ROUTER_CONFIG` — path to `config.yaml` (otherwise looks in CWD or next to source)
167
+ - `MCP_SIEVE_DOWNSTREAM_<N>_NAME` / `_COMMAND` / `_ARGS` / `_URL` / `_TRANSPORT` — define downstream servers without a file (Docker/k8s). `N` starts at 1, stops at the first gap. `_ARGS` is a JSON array or whitespace-split. A same-named entry overrides the yaml one.
168
+ - `MCP_SIEVE_OLLAMA_URL` / `MCP_SIEVE_EMBED_MODEL` / `MCP_SIEVE_TOP_N` — embeddings overrides
169
+
170
+ Crashed downstream servers (Ollama, npx) auto-reconnect with exponential backoff — no restart needed.
171
+
172
+ ## Windows notes
173
+
174
+ 1. **npx → `cmd /c npx`:** `npx` is a `.cmd` file, Python subprocess (MCP SDK) can't find it without a shell. `uvx` is a real binary, works directly.
175
+
176
+ 2. **uvx --from and dependencies:** `uvx --from <project>` installs the package into an isolated uv-cache venv. All imports must be in `pyproject.toml` `[project.dependencies]` — implicit deps from the dev env won't be picked up.
177
+
178
+ 3. **uv cache clean:** if the cache is locked (`os error 32`), kill MCP server processes first:
179
+ ```bash
180
+ powershell -Command "Get-Process | Where-Object { $_.ProcessName -match 'mcp|uv' } | Stop-Process -Force"
181
+ uv cache clean --force
182
+ ```
183
+
184
+ 4. **Debug connection failures:** `claude --debug` writes to `~/.claude/debug/<session>.txt`. Grep `Server stderr:` for real server tracebacks.
185
+
186
+ ## Stack
187
+
188
+ - **MCP Python SDK** (`mcp`) — stdio + HTTP/SSE transports, `notifications/tools/list_changed`
189
+ - **Ollama** — local embeddings (`nomic-embed-text`), free
190
+ - **numpy** — cosine similarity
191
+ - **httpx** — HTTP client for Ollama API
192
+
193
+ ## Fallback
194
+
195
+ If Ollama is unavailable — the sieve doesn't crash. `mcp_router_select` returns **all** downstream tools without ranking, with a warning in the response.
196
+
197
+ ## Performance
198
+
199
+ Tested with 9 downstream servers (74 tools):
200
+ - `mcp_router_select`: 83–166ms
201
+ - `mcp_router_call`: 15–774ms (longest: playwright browser navigation)
202
+ - Startup: ~16s (all 9 downstream connect + 74 embeddings built)
203
+
204
+ ## Status
205
+
206
+ Working end-to-end in Claude Code and Hermes. See `TASKS.md` for the roadmap and benchmark results.
@@ -0,0 +1,161 @@
1
+ # MCP Sieve
2
+
3
+ A semantic proxy for MCP servers. Solves **tool selection degradation** — when an LLM has too many tools, it picks the wrong ones.
4
+
5
+ The sieve sits between the client (Claude Code, Hermes, any MCP client) and downstream MCP servers. The client sees **2 tools** instead of dozens: `mcp_router_select` + `mcp_router_call`. The first finds relevant tools via embeddings, the second proxies the call.
6
+
7
+ ## How it works
8
+
9
+ ```
10
+ Client (Claude Code / Hermes)
11
+ ↓ sees only 2 tools
12
+ mcp_router_select(task="...") → embeddings → top-N relevant tools
13
+ mcp_router_call(tool_name, arguments) → proxies to downstream
14
+
15
+ downstream MCP servers (time, fetch, git, arxiv, playwright, ...)
16
+ ```
17
+
18
+ **Two call paths:**
19
+
20
+ 1. **Path 1 (notifications/tools/list_changed):** `select` finds tools → sieve updates `tools/list` → client calls tools directly. Works with clients that support dynamic toolset updates.
21
+
22
+ 2. **Path 2 (mcp_router_call proxy):** for clients with a **frozen toolset** (Hermes, Claude Code with prompt caching). `select` returns tools with `inputSchema` → `call` proxies execution. No `/reset` needed when new downstream tools are discovered.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ git clone <repo> ~/Projects/mcp-sieve
28
+ cd ~/Projects/mcp-sieve
29
+ uv pip install -e .
30
+ ```
31
+
32
+ Requires [Ollama](https://ollama.com) with an embed model:
33
+ ```bash
34
+ ollama pull nomic-embed-text
35
+ ```
36
+
37
+ ## Quick start
38
+
39
+ Copy the example config and edit it:
40
+ ```bash
41
+ cp config.example.yaml config.yaml
42
+ # edit config.yaml — add your downstream servers and paths
43
+ ```
44
+
45
+ Run standalone:
46
+ ```bash
47
+ python -m mcp_router.server
48
+ ```
49
+ Server listens on stdio (JSON-RPC).
50
+
51
+ ## Connect to Claude Code
52
+
53
+ In `~/.claude.json` → `projects["<path>"].mcpServers`:
54
+
55
+ ```json
56
+ "sieve": {
57
+ "type": "stdio",
58
+ "command": "uvx",
59
+ "args": ["--from", "/path/to/mcp-sieve", "mcp-sieve"],
60
+ "env": {
61
+ "MCP_ROUTER_CONFIG": "/path/to/mcp-sieve/config.yaml"
62
+ }
63
+ }
64
+ ```
65
+
66
+ Or via CLI:
67
+ ```bash
68
+ claude mcp add sieve -- uvx --from /path/to/mcp-sieve mcp-sieve
69
+ ```
70
+
71
+ > **Windows:** `MCP_ROUTER_CONFIG` is required — `uvx` installs the package into an isolated venv, `__file__` points into uv-cache. See [Windows notes](#windows-notes) below.
72
+
73
+ ## Connect to Hermes
74
+
75
+ ```bash
76
+ hermes mcp add sieve --command uvx --args "--from" --args "/path/to/mcp-sieve" --args "mcp-sieve"
77
+ hermes mcp test sieve
78
+ # /reset in chat
79
+ ```
80
+
81
+ ## Config
82
+
83
+ `config.yaml` (see `config.example.yaml` for a full template):
84
+
85
+ ```yaml
86
+ downstream:
87
+ - name: time
88
+ command: uvx
89
+ args: ["mcp-server-time"]
90
+
91
+ - name: fetch
92
+ command: uvx
93
+ args: ["mcp-server-fetch"]
94
+
95
+ - name: git
96
+ command: uvx
97
+ args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
98
+
99
+ # Windows: npx is a .cmd file — needs cmd /c
100
+ - name: context7
101
+ command: cmd
102
+ args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
103
+
104
+ - name: filesystem
105
+ command: cmd
106
+ args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"]
107
+
108
+ # Remote MCP over HTTP (streamable) or SSE — no local process.
109
+ # transport defaults to stdio; a bare url implies http.
110
+ - name: gitmcp
111
+ transport: http
112
+ url: "https://gitmcp.io/docs"
113
+
114
+ embeddings:
115
+ ollama_url: "http://127.0.0.1:11434/api/embeddings"
116
+ model: "nomic-embed-text"
117
+ top_n: 10
118
+ ```
119
+
120
+ Env variables:
121
+ - `MCP_ROUTER_CONFIG` — path to `config.yaml` (otherwise looks in CWD or next to source)
122
+ - `MCP_SIEVE_DOWNSTREAM_<N>_NAME` / `_COMMAND` / `_ARGS` / `_URL` / `_TRANSPORT` — define downstream servers without a file (Docker/k8s). `N` starts at 1, stops at the first gap. `_ARGS` is a JSON array or whitespace-split. A same-named entry overrides the yaml one.
123
+ - `MCP_SIEVE_OLLAMA_URL` / `MCP_SIEVE_EMBED_MODEL` / `MCP_SIEVE_TOP_N` — embeddings overrides
124
+
125
+ Crashed downstream servers (Ollama, npx) auto-reconnect with exponential backoff — no restart needed.
126
+
127
+ ## Windows notes
128
+
129
+ 1. **npx → `cmd /c npx`:** `npx` is a `.cmd` file, Python subprocess (MCP SDK) can't find it without a shell. `uvx` is a real binary, works directly.
130
+
131
+ 2. **uvx --from and dependencies:** `uvx --from <project>` installs the package into an isolated uv-cache venv. All imports must be in `pyproject.toml` `[project.dependencies]` — implicit deps from the dev env won't be picked up.
132
+
133
+ 3. **uv cache clean:** if the cache is locked (`os error 32`), kill MCP server processes first:
134
+ ```bash
135
+ powershell -Command "Get-Process | Where-Object { $_.ProcessName -match 'mcp|uv' } | Stop-Process -Force"
136
+ uv cache clean --force
137
+ ```
138
+
139
+ 4. **Debug connection failures:** `claude --debug` writes to `~/.claude/debug/<session>.txt`. Grep `Server stderr:` for real server tracebacks.
140
+
141
+ ## Stack
142
+
143
+ - **MCP Python SDK** (`mcp`) — stdio + HTTP/SSE transports, `notifications/tools/list_changed`
144
+ - **Ollama** — local embeddings (`nomic-embed-text`), free
145
+ - **numpy** — cosine similarity
146
+ - **httpx** — HTTP client for Ollama API
147
+
148
+ ## Fallback
149
+
150
+ If Ollama is unavailable — the sieve doesn't crash. `mcp_router_select` returns **all** downstream tools without ranking, with a warning in the response.
151
+
152
+ ## Performance
153
+
154
+ Tested with 9 downstream servers (74 tools):
155
+ - `mcp_router_select`: 83–166ms
156
+ - `mcp_router_call`: 15–774ms (longest: playwright browser navigation)
157
+ - Startup: ~16s (all 9 downstream connect + 74 embeddings built)
158
+
159
+ ## Status
160
+
161
+ Working end-to-end in Claude Code and Hermes. See `TASKS.md` for the roadmap and benchmark results.
@@ -0,0 +1,34 @@
1
+ # Roadmap — MCP Sieve
2
+
3
+ ## Completed
4
+
5
+ - **Core MCP server** — stdio transport, `initialize`/`tools/list`/`tools/call`, `listChanged` capability
6
+ - **Downstream discovery** — connects to all downstream MCP servers from `config.yaml`, pulls tools via `list_tools()`, one failing server doesn't crash the rest
7
+ - **Semantic search** — Ollama embeddings (`nomic-embed-text`), cosine similarity, top-N ranking. Fallback: if Ollama is down, returns all tools with a warning
8
+ - **Call routing** — `tools/call` proxied to the correct downstream. Unknown tools → clear error
9
+ - **`mcp_router_call` proxy** — for clients with frozen toolsets (prompt caching). `select` returns tools with `inputSchema`, `call` proxies execution. No `/reset` needed on new downstream tools
10
+ - **Cross-platform** — tested on Windows (npx via `cmd /c`, config discovery via `MCP_ROUTER_CONFIG` env). Works with Claude Code and Hermes
11
+ - **HTTP/SSE downstream transport** — `transport: http|sse` + `url` in config connects remote MCP (GitMCP, Cloudflare Remote MCP). stdio stays the default
12
+ - **Env-var config** — `MCP_SIEVE_DOWNSTREAM_<N>_*` and embeddings overrides, for Docker/k8s with no config file. Merges over yaml by name
13
+ - **Auto-reconnect** — per-downstream supervisor tasks with `send_ping` liveness + exponential backoff. A crashed downstream (Ollama, npx) self-heals instead of being lost until restart
14
+
15
+ ## Benchmark
16
+
17
+ Tested with **10 downstream servers (79 tools)** in Claude Code across 5 long tasks:
18
+
19
+ | Metric | Result |
20
+ |--------|--------|
21
+ | `mcp_router_select` latency | 83–166ms |
22
+ | `mcp_router_call` latency | 15–774ms |
23
+ | Startup (10 servers + 79 embeddings) | ~16–18s |
24
+ | Tools in LLM system prompt | 2 (always) |
25
+ | Router accuracy (relevant tool in top-10) | 100% across test tasks |
26
+ | Remote HTTP transport | verified (GitMCP via streamable_http) |
27
+
28
+ **Downstream servers tested:** time, fetch, git, arxiv, context7, filesystem, playwright, sequential-thinking, memory, gitmcp (HTTP)
29
+
30
+ **Observation:** Claude Code prefers native tools when they overlap (Bash vs git MCP, Write vs filesystem MCP). The router is most valuable for tools without native equivalents — arxiv, playwright, memory, context7.
31
+
32
+ ## Planned
33
+
34
+ - FAISS for >1000 tools (currently numpy cosine sim) — deferred; numpy is fine below ~1000 tools
@@ -0,0 +1,61 @@
1
+ # MCP Sieve configuration
2
+ # Copy this file to config.yaml and edit for your setup.
3
+ # Real config.yaml is gitignored — it contains your local paths.
4
+
5
+ # Downstream MCP servers that the router will aggregate.
6
+ # Each entry spawns a stdio MCP server process.
7
+ downstream:
8
+ - name: time
9
+ command: uvx
10
+ args: ["mcp-server-time"]
11
+
12
+ - name: fetch
13
+ command: uvx
14
+ args: ["mcp-server-fetch"]
15
+
16
+ - name: git
17
+ command: uvx
18
+ args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
19
+
20
+ - name: arxiv
21
+ command: uvx
22
+ args: ["arxiv-mcp-server"]
23
+
24
+ # Windows: npx is a .cmd file — Python subprocess can't find it without a shell.
25
+ # Use "cmd /c npx" instead of just "npx".
26
+ - name: context7
27
+ command: cmd
28
+ args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
29
+
30
+ - name: filesystem
31
+ command: cmd
32
+ args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/allowed/dir"]
33
+
34
+ - name: playwright
35
+ command: cmd
36
+ args: ["/c", "npx", "-y", "@playwright/mcp@latest"]
37
+
38
+ - name: sequential-thinking
39
+ command: cmd
40
+ args: ["/c", "npx", "-y", "@modelcontextprotocol/server-sequential-thinking"]
41
+
42
+ - name: memory
43
+ command: cmd
44
+ args: ["/c", "npx", "-y", "@modelcontextprotocol/server-memory"]
45
+
46
+ # Remote MCP over HTTP (streamable) or SSE — no local process spawned.
47
+ # `transport` defaults to stdio; a bare `url` implies http.
48
+ - name: gitmcp
49
+ transport: http # or: sse
50
+ url: "https://gitmcp.io/docs"
51
+
52
+ # Env-var config (Docker/k8s, no file): MCP_SIEVE_DOWNSTREAM_1_NAME,
53
+ # _COMMAND, _ARGS (JSON array or whitespace-split), _URL, _TRANSPORT.
54
+ # N starts at 1 and stops at the first gap. Same-named env entry overrides yaml.
55
+ # Also: MCP_SIEVE_OLLAMA_URL, MCP_SIEVE_EMBED_MODEL, MCP_SIEVE_TOP_N.
56
+
57
+ # Embeddings settings (Ollama)
58
+ embeddings:
59
+ ollama_url: "http://127.0.0.1:11434/api/embeddings"
60
+ model: "nomic-embed-text"
61
+ top_n: 10 # how many relevant tools to return per select call
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "mcp-sieve"
3
+ version = "0.1.0"
4
+ description = "MCP Sieve — semantic tool selection proxy for LLMs. Aggregates downstream MCP servers into 2 static tools (select + call) via Ollama embeddings."
5
+ readme = "README.md"
6
+ license = { file = "LICENSE" }
7
+ requires-python = ">=3.11"
8
+ authors = [{ name = "dimarch0x" }]
9
+ keywords = ["mcp", "mcp-server", "semantic-routing", "llm", "ollama", "tool-selection"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.11",
15
+ "Programming Language :: Python :: 3.12",
16
+ "Programming Language :: Python :: 3.13",
17
+ "Topic :: Software Development :: Libraries",
18
+ ]
19
+ dependencies = [
20
+ "mcp>=1.0.0",
21
+ "httpx>=0.27.0",
22
+ "numpy>=1.26.0",
23
+ "pyyaml>=6.0",
24
+ ]
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/dimarch0x/mcp-sieve"
28
+ Repository = "https://github.com/dimarch0x/mcp-sieve"
29
+ Issues = "https://github.com/dimarch0x/mcp-sieve/issues"
30
+
31
+ [project.scripts]
32
+ mcp-sieve = "mcp_router.server:main"
33
+
34
+ [build-system]
35
+ requires = ["hatchling"]
36
+ build-backend = "hatchling.build"
37
+
38
+ [tool.uv]
39
+ package = true
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/mcp_router"]
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env bash
2
+ # Smoke test: send JSON-RPC requests to mcp-sieve via stdio.
3
+ # Checks: initialize → tools/list → tools/call(mcp_router_select)
4
+
5
+ set -e
6
+ cd "$(dirname "$0")/.."
7
+
8
+ # JSON-RPC initialize
9
+ INIT='{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"0.1"}}}'
10
+ # initialized notification
11
+ INIT_DONE='{"jsonrpc":"2.0","method":"notifications/initialized"}'
12
+ # tools/list
13
+ LIST='{"jsonrpc":"2.0","id":2,"method":"tools/list"}'
14
+ # tools/call mcp_router_select
15
+ CALL='{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"mcp_router_select","arguments":{"task":"get current time in UTC"}}}'
16
+
17
+ # Pipe all requests, read responses
18
+ printf '%s\n%s\n%s\n%s\n' "$INIT" "$INIT_DONE" "$LIST" "$CALL" | \
19
+ timeout 10 python -m mcp_router.server 2>/dev/null
@@ -0,0 +1 @@
1
+ """MCP Sieve package."""
@@ -0,0 +1,482 @@
1
+ """MCP Sieve — semantic tool selection proxy.
2
+
3
+ Path 1 (notifications/tools/list_changed):
4
+ 1. tools/list returns 1 tool: mcp_router_select
5
+ 2. LLM calls mcp_router_select(task="...") → sieve finds top-N tools
6
+ 3. Sieve updates current_tools + sends notifications/tools/list_changed
7
+ 4. tools/list now returns the relevant tools
8
+ 5. LLM calls a tool directly → sieve proxies to downstream
9
+
10
+ Path 2 (mcp_router_call):
11
+ For clients with a frozen toolset (e.g. Hermes with prompt caching):
12
+ 1. LLM calls mcp_router_select(task="...") → gets a list of suitable tools with their inputSchema.
13
+ 2. LLM calls mcp_router_call(tool_name="...", arguments={...}) to execute the chosen tool.
14
+
15
+ ponytail: one file, everything in it.
16
+ """
17
+ import asyncio
18
+ import contextlib
19
+ import json
20
+ import logging
21
+ import os
22
+ from pathlib import Path
23
+ from typing import NamedTuple
24
+
25
+ import httpx
26
+ import numpy as np
27
+ import yaml
28
+ from mcp import ClientSession, StdioServerParameters
29
+ from mcp.client.sse import sse_client
30
+ from mcp.client.stdio import stdio_client
31
+ from mcp.client.streamable_http import streamablehttp_client
32
+ from mcp.server import NotificationOptions, Server
33
+ from mcp.server.stdio import stdio_server
34
+ from mcp.types import CallToolResult, EmbeddedResource, ImageContent, TextContent, Tool
35
+
36
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
37
+ log = logging.getLogger("mcp-sieve")
38
+
39
+ # --- Config ------------------------------------------------------------------
40
+
41
+ # ponytail: when launched via uvx --from <project>, the package lives in uv-cache,
42
+ # __file__ points there. Look for config.yaml: (1) env MCP_ROUTER_CONFIG,
43
+ # (2) next to CWD, (3) fallback to the old path relative to source.
44
+ def _find_config() -> Path:
45
+ env = os.environ.get("MCP_ROUTER_CONFIG")
46
+ if env:
47
+ p = Path(env).expanduser()
48
+ if p.exists():
49
+ return p
50
+ cwd_cfg = Path.cwd() / "config.yaml"
51
+ if cwd_cfg.exists():
52
+ return cwd_cfg
53
+ src_cfg = Path(__file__).resolve().parent.parent.parent / "config.yaml"
54
+ return src_cfg
55
+
56
+ CONFIG_PATH = _find_config()
57
+
58
+
59
+ def _env_downstream() -> list[dict]:
60
+ """Read MCP_SIEVE_DOWNSTREAM_<N>_* env vars into downstream entries.
61
+
62
+ ponytail: for Docker/k8s where mounting a file is a pain. N starts at 1,
63
+ stops at the first gap. _ARGS is a JSON array (for quoting/backslashes) or
64
+ plain whitespace-split. _URL/_TRANSPORT enable HTTP/SSE downstream.
65
+ """
66
+ out: list[dict] = []
67
+ n = 1
68
+ while True:
69
+ name = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_NAME")
70
+ if not name:
71
+ break
72
+ entry: dict = {"name": name}
73
+ for key, field in (("COMMAND", "command"), ("URL", "url"), ("TRANSPORT", "transport")):
74
+ val = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_{key}")
75
+ if val:
76
+ entry[field] = val
77
+ raw_args = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_ARGS")
78
+ if raw_args:
79
+ entry["args"] = json.loads(raw_args) if raw_args.lstrip().startswith("[") else raw_args.split()
80
+ out.append(entry)
81
+ n += 1
82
+ return out
83
+
84
+
85
+ def _apply_env(cfg: dict) -> dict:
86
+ """Merge env-var overrides into a loaded config dict."""
87
+ downstream = cfg.get("downstream") or []
88
+ env_ds = _env_downstream()
89
+ if env_ds:
90
+ # name-keyed merge: env entry replaces a same-named yaml entry, else appends
91
+ by_name = {d["name"]: d for d in downstream}
92
+ for d in env_ds:
93
+ by_name[d["name"]] = d
94
+ cfg["downstream"] = list(by_name.values())
95
+
96
+ emb = cfg.setdefault("embeddings", {})
97
+ for key, field in (("OLLAMA_URL", "ollama_url"), ("EMBED_MODEL", "model")):
98
+ val = os.environ.get(f"MCP_SIEVE_{key}")
99
+ if val:
100
+ emb[field] = val
101
+ if os.environ.get("MCP_SIEVE_TOP_N"):
102
+ emb["top_n"] = int(os.environ["MCP_SIEVE_TOP_N"])
103
+ return cfg
104
+
105
+
106
+ def load_config() -> dict:
107
+ if not CONFIG_PATH.exists():
108
+ log.info("config.yaml not found (%s), relying on env vars", CONFIG_PATH)
109
+ return _apply_env({"downstream": [], "embeddings": {}})
110
+ log.info("loading config: %s", CONFIG_PATH)
111
+ cfg = yaml.safe_load(CONFIG_PATH.read_text(encoding="utf-8")) or {}
112
+ return _apply_env(cfg)
113
+
114
+
115
+ CFG = load_config()
116
+ EMBED_CFG = CFG.get("embeddings", {})
117
+ OLLAMA_URL = EMBED_CFG.get("ollama_url", "http://127.0.0.1:11434/api/embeddings")
118
+ EMBED_MODEL = EMBED_CFG.get("model", "nomic-embed-text")
119
+ TOP_N = EMBED_CFG.get("top_n", 10)
120
+ DOWNSTREAM = CFG.get("downstream", [])
121
+
122
+ # --- State -------------------------------------------------------------------
123
+
124
+
125
+ class RegisteredTool(NamedTuple):
126
+ downstream: str
127
+ orig_name: str
128
+ tool: Tool
129
+
130
+
131
+ # ns_name → RegisteredTool
132
+ tool_registry: dict[str, RegisteredTool] = {}
133
+ # ns_name → embedding vector (np.ndarray)
134
+ embeddings_cache: dict[str, np.ndarray] = {}
135
+ # downstream_name → ClientSession
136
+ downstream_sessions: dict[str, ClientSession] = {}
137
+ # current tool list exposed via tools/list
138
+ current_tools: list[Tool] = []
139
+
140
+ # --- Embedding ---------------------------------------------------------------
141
+
142
+ _embed_client: httpx.AsyncClient | None = None
143
+
144
+
145
+ async def embed(text: str) -> np.ndarray:
146
+ """Get text embedding via Ollama. ponytail: in-memory cache, no DB."""
147
+ global _embed_client
148
+ if _embed_client is None:
149
+ _embed_client = httpx.AsyncClient(timeout=30)
150
+ r = await _embed_client.post(OLLAMA_URL, json={"model": EMBED_MODEL, "prompt": text})
151
+ r.raise_for_status()
152
+ return np.array(r.json()["embedding"], dtype=np.float32)
153
+
154
+
155
+ def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
156
+ # ponytail: numpy — fine up to 1000 tools; beyond that use FAISS.
157
+ return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8))
158
+
159
+
160
+ # --- Helpers -----------------------------------------------------------------
161
+
162
+
163
+ def _text(text: str) -> list[TextContent]:
164
+ return [TextContent(type="text", text=text)]
165
+
166
+
167
+ def _tool_dict(t: Tool) -> dict:
168
+ """Single serialization point for Tool → dict in select responses."""
169
+ return {"name": t.name, "description": t.description, "inputSchema": t.inputSchema}
170
+
171
+
172
+ def _resolve(name: str) -> str | None:
173
+ """Resolve a tool name: exact → strip mcp_router_ prefix → unique suffix match.
174
+
175
+ ponytail: suffix match with uniqueness check — never silently picks the first.
176
+ 'time' would match '..._get_time', but if >1 match → None (ambiguous).
177
+ """
178
+ if name in tool_registry:
179
+ return name
180
+ if name.startswith("mcp_router_"):
181
+ stripped = name[len("mcp_router_"):]
182
+ if stripped in tool_registry:
183
+ return stripped
184
+ matches = [n for n in tool_registry if n.endswith(f"_{name}")]
185
+ return matches[0] if len(matches) == 1 else None
186
+
187
+
188
+ async def _invoke(entry: RegisteredTool, args: dict) -> list[TextContent]:
189
+ """Single point for calling a downstream session."""
190
+ session = downstream_sessions.get(entry.downstream)
191
+ if session is None:
192
+ return _text(f"downstream session {entry.downstream} not available")
193
+ try:
194
+ result: CallToolResult = await session.call_tool(entry.orig_name, args)
195
+ return result.content
196
+ except Exception as e:
197
+ log.exception("downstream call failed: %s/%s", entry.downstream, entry.orig_name)
198
+ return _text(json.dumps({"error": f"downstream call failed: {e}"}, ensure_ascii=False))
199
+
200
+
201
+ # --- Downstream discovery ----------------------------------------------------
202
+
203
+ async def _open_transport(ds: dict, exit_stack: contextlib.AsyncExitStack):
204
+ """Open the right transport for a downstream and return (read, write).
205
+
206
+ ponytail: transport defaults to stdio; a bare `url` implies http.
207
+ """
208
+ name = ds["name"]
209
+ transport = ds.get("transport") or ("http" if ds.get("url") else "stdio")
210
+ if transport == "stdio":
211
+ params = StdioServerParameters(command=ds["command"], args=ds.get("args", []), env=None)
212
+ log.info("connecting downstream %s (stdio): %s %s", name, params.command, params.args)
213
+ streams = await exit_stack.enter_async_context(stdio_client(params))
214
+ elif transport in ("http", "streamable-http"):
215
+ log.info("connecting downstream %s (http): %s", name, ds["url"])
216
+ streams = await exit_stack.enter_async_context(streamablehttp_client(ds["url"]))
217
+ elif transport == "sse":
218
+ log.info("connecting downstream %s (sse): %s", name, ds["url"])
219
+ streams = await exit_stack.enter_async_context(sse_client(ds["url"]))
220
+ else:
221
+ raise ValueError(f"unknown transport '{transport}' for downstream {name}")
222
+ return streams[0], streams[1] # streamablehttp yields a 3rd session-id getter, ignore it
223
+
224
+
225
+ async def _connect(ds: dict, exit_stack: contextlib.AsyncExitStack) -> ClientSession:
226
+ """Open transport + ClientSession + initialize. Shared by discovery and reconnect."""
227
+ read, write = await _open_transport(ds, exit_stack)
228
+ session = await exit_stack.enter_async_context(ClientSession(read, write))
229
+ await session.initialize()
230
+ return session
231
+
232
+
233
+ async def _register_tools(name: str, session: ClientSession) -> None:
234
+ """Pull tools + build embeddings for one downstream. Idempotent across reconnects.
235
+
236
+ ponytail: a server's tool set is stable — register once, skip on reconnect.
237
+ """
238
+ if any(e.downstream == name for e in tool_registry.values()):
239
+ return # already registered on a previous connect
240
+ try:
241
+ result = await session.list_tools()
242
+ except Exception as e:
243
+ log.error("failed to list tools from downstream %s: %s", name, e)
244
+ return
245
+ for tool in result.tools:
246
+ ns_name = f"{name}_{tool.name}"
247
+ ns_tool = Tool(name=ns_name, description=tool.description, inputSchema=tool.inputSchema)
248
+ tool_registry[ns_name] = RegisteredTool(name, tool.name, ns_tool)
249
+ try:
250
+ for ns_name, entry in list(tool_registry.items()):
251
+ if entry.downstream == name and ns_name not in embeddings_cache:
252
+ embeddings_cache[ns_name] = await embed(f"{entry.tool.name}: {entry.tool.description or ''}")
253
+ log.info("embeddings ready for %s (%d total)", name, len(embeddings_cache))
254
+ except Exception as e:
255
+ log.warning("embeddings failed for %s (%s), semantic search degraded", name, e)
256
+
257
+
258
+ # ponytail: fixed backoff cap; make configurable if flapping servers appear.
259
+ RECONNECT_BASE, RECONNECT_CAP, HEALTH_INTERVAL, STARTUP_TIMEOUT = 1.0, 30.0, 15.0, 60.0
260
+
261
+
262
+ async def _supervise(ds: dict, ready: asyncio.Event) -> None:
263
+ """Keep one downstream connected; reconnect with exponential backoff on failure.
264
+
265
+ ponytail: each supervisor owns its exit_stack so connect/teardown run in the
266
+ same task — sidesteps anyio 'cancel scope in a different task'. Liveness via
267
+ periodic send_ping; a raised ping means the transport died.
268
+ """
269
+ name = ds["name"]
270
+ backoff = RECONNECT_BASE
271
+ while True:
272
+ try:
273
+ async with contextlib.AsyncExitStack() as stack:
274
+ session = await _connect(ds, stack)
275
+ downstream_sessions[name] = session
276
+ backoff = RECONNECT_BASE
277
+ log.info("downstream %s connected", name)
278
+ await _register_tools(name, session)
279
+ ready.set()
280
+ while True:
281
+ await asyncio.sleep(HEALTH_INTERVAL)
282
+ await session.send_ping()
283
+ except asyncio.CancelledError:
284
+ raise
285
+ except Exception as e:
286
+ downstream_sessions.pop(name, None)
287
+ ready.set() # don't hold up startup on a server that won't connect
288
+ log.warning("downstream %s down (%s); reconnecting in %.0fs", name, e, backoff)
289
+ await asyncio.sleep(backoff)
290
+ backoff = min(backoff * 2, RECONNECT_CAP)
291
+
292
+
293
+ # --- MCP Server --------------------------------------------------------------
294
+
295
+ server: Server = Server("mcp-sieve")
296
+
297
+ ROUTER_SELECT_TOOL = Tool(
298
+ name="mcp_router_select",
299
+ description=(
300
+ "REQUIRED first step before any task involving external tools. "
301
+ "Pass your current task description; returns the relevant tools "
302
+ "available for that task. Always call this first."
303
+ ),
304
+ inputSchema={
305
+ "type": "object",
306
+ "properties": {
307
+ "task": {
308
+ "type": "string",
309
+ "description": "Natural language description of what you want to do.",
310
+ }
311
+ },
312
+ "required": ["task"],
313
+ },
314
+ )
315
+
316
+ ROUTER_CALL_TOOL = Tool(
317
+ name="mcp_router_call",
318
+ description=(
319
+ "Execute a downstream tool by name. Use after mcp_router_select "
320
+ "to find the right tool, then call this with the tool_name and "
321
+ "arguments from the select response."
322
+ ),
323
+ inputSchema={
324
+ "type": "object",
325
+ "properties": {
326
+ "tool_name": {
327
+ "type": "string",
328
+ "description": "Tool name from mcp_router_select response (e.g. 'time_get_current_time').",
329
+ },
330
+ "arguments": {
331
+ "type": "object",
332
+ "description": "Arguments object matching the tool's inputSchema.",
333
+ },
334
+ },
335
+ "required": ["tool_name", "arguments"],
336
+ },
337
+ )
338
+
339
+
340
+ @server.list_tools()
341
+ async def list_tools() -> list[Tool]:
342
+ """Return the current tool list."""
343
+ return current_tools
344
+
345
+
346
+ @server.call_tool()
347
+ async def call_tool(name: str, arguments: dict | None) -> list[TextContent] | list[ImageContent] | list[EmbeddedResource]:
348
+ """Tool call routing.
349
+
350
+ ponytail: Hermes may pass a prefixed name (mcp_router_time_get_current_time).
351
+ Strip mcp_router_ if the stripped name matches the registry or select.
352
+ """
353
+ args = arguments or {}
354
+
355
+ # ponytail: insurance — if the client passes a prefixed name, strip it
356
+ if name.startswith("mcp_router_"):
357
+ stripped = name[len("mcp_router_"):]
358
+ if stripped in ("mcp_router_select", "mcp_router_call") or stripped in tool_registry:
359
+ name = stripped
360
+
361
+ if name == "mcp_router_select":
362
+ return await _handle_select(args.get("task", ""))
363
+
364
+ if name == "mcp_router_call":
365
+ tool_name = args.get("tool_name", "")
366
+ tool_args = args.get("arguments", {})
367
+ if isinstance(tool_args, str):
368
+ try:
369
+ tool_args = json.loads(tool_args)
370
+ except Exception as e:
371
+ return _text(json.dumps({"error": f"failed to parse arguments JSON: {e}"}, ensure_ascii=False))
372
+
373
+ if not tool_name:
374
+ return _text(json.dumps({"error": "tool_name is required"}, ensure_ascii=False))
375
+
376
+ resolved = _resolve(tool_name)
377
+ if resolved is None:
378
+ return _text(json.dumps({"error": f"unknown tool: {tool_name}"}, ensure_ascii=False))
379
+ return await _invoke(tool_registry[resolved], tool_args)
380
+
381
+ # Direct downstream call by registry name
382
+ resolved = _resolve(name)
383
+ if resolved is not None:
384
+ return await _invoke(tool_registry[resolved], args)
385
+
386
+ return _text(json.dumps({"error": f"unknown tool: {name}"}, ensure_ascii=False))
387
+
388
+
389
+ async def _handle_select(task: str) -> list[TextContent]:
390
+ """Core: semantic search for tools matching the task."""
391
+ if not task:
392
+ return _text('{"error": "task is required"}')
393
+
394
+ if not tool_registry:
395
+ return _text(json.dumps({
396
+ "info": "no downstream tools registered yet",
397
+ "available_tools": [t.name for t in current_tools],
398
+ }, ensure_ascii=False))
399
+
400
+ if not embeddings_cache:
401
+ # no embeddings — return all tools, don't break the flow
402
+ all_ns_tools = [entry.tool for entry in tool_registry.values()]
403
+ _set_current_tools(all_ns_tools)
404
+ return _text(json.dumps({
405
+ "warning": "embeddings unavailable; returning all tools",
406
+ "tools": [_tool_dict(t) for t in all_ns_tools],
407
+ "hint": "Use mcp_router_call(tool_name=<name>, arguments=<args>) to execute any of these tools.",
408
+ }, ensure_ascii=False))
409
+
410
+ q_vec = await embed(task)
411
+ scored: list[tuple[float, Tool]] = []
412
+ for ns_name, vec in embeddings_cache.items():
413
+ entry = tool_registry[ns_name]
414
+ scored.append((cosine_sim(q_vec, vec), entry.tool))
415
+ scored.sort(key=lambda x: x[0], reverse=True)
416
+
417
+ top = [t for _, t in scored[:TOP_N]]
418
+ _set_current_tools(top)
419
+
420
+ # notify the client (best-effort, only works inside a request context)
421
+ try:
422
+ ctx = server.request_context
423
+ await ctx.session.send_tool_list_changed()
424
+ log.info("sent tools/list_changed, now exposing %d tools", len(top))
425
+ except (LookupError, AttributeError) as e:
426
+ log.warning("cannot send list_changed outside active request: %s", e)
427
+ except Exception as e:
428
+ log.warning("could not send list_changed: %s", e)
429
+
430
+ payload = {
431
+ "selected_tools": [_tool_dict(t) for t in top],
432
+ "hint": "Use mcp_router_call(tool_name=<name>, arguments=<args>) to execute any of these tools.",
433
+ }
434
+ return _text(json.dumps(payload, ensure_ascii=False))
435
+
436
+
437
+ def _set_current_tools(tools: list[Tool]) -> None:
438
+ """Update current_tools. Always keep mcp_router_select and mcp_router_call first."""
439
+ global current_tools
440
+ others = [t for t in tools if t.name not in ("mcp_router_select", "mcp_router_call")]
441
+ current_tools = [ROUTER_SELECT_TOOL, ROUTER_CALL_TOOL] + others
442
+
443
+
444
+ # --- Entrypoint --------------------------------------------------------------
445
+
446
+ async def main_async() -> None:
447
+ _set_current_tools([])
448
+
449
+ ready = [asyncio.Event() for _ in DOWNSTREAM]
450
+ supervisors = [
451
+ asyncio.create_task(_supervise(ds, ev), name=f"supervise-{ds['name']}")
452
+ for ds, ev in zip(DOWNSTREAM, ready)
453
+ ]
454
+ # Wait for the first connect attempt of each downstream so tools/list isn't
455
+ # empty on the first select — bounded so one slow server can't stall startup.
456
+ if ready:
457
+ with contextlib.suppress(asyncio.TimeoutError):
458
+ await asyncio.wait_for(
459
+ asyncio.gather(*(e.wait() for e in ready)), timeout=STARTUP_TIMEOUT
460
+ )
461
+
462
+ log.info("starting mcp-sieve on stdio, %d downstream connected, %d tools registered",
463
+ len(downstream_sessions), len(tool_registry))
464
+ try:
465
+ async with stdio_server() as (read, write):
466
+ await server.run(
467
+ read,
468
+ write,
469
+ server.create_initialization_options(NotificationOptions(tools_changed=True)),
470
+ )
471
+ finally:
472
+ for t in supervisors:
473
+ t.cancel()
474
+ await asyncio.gather(*supervisors, return_exceptions=True)
475
+
476
+
477
+ def main() -> None:
478
+ asyncio.run(main_async())
479
+
480
+
481
+ if __name__ == "__main__":
482
+ main()
@@ -0,0 +1,107 @@
1
+ """ponytail self-checks for env config (#2) and reconnect supervisor (#3).
2
+
3
+ Run: python test_transport_env_reconnect.py
4
+ No framework — plain asserts. Fails loudly if the logic breaks.
5
+ """
6
+ import asyncio
7
+ import os
8
+ import sys
9
+
10
+ sys.path.insert(0, "src")
11
+ import mcp_router.server as s
12
+
13
+
14
+ def test_env_downstream():
15
+ env = {
16
+ "MCP_SIEVE_DOWNSTREAM_1_NAME": "time",
17
+ "MCP_SIEVE_DOWNSTREAM_1_COMMAND": "uvx",
18
+ "MCP_SIEVE_DOWNSTREAM_1_ARGS": "mcp-server-time --local", # whitespace split
19
+ "MCP_SIEVE_DOWNSTREAM_2_NAME": "remote",
20
+ "MCP_SIEVE_DOWNSTREAM_2_URL": "https://example.com/mcp",
21
+ "MCP_SIEVE_DOWNSTREAM_2_ARGS": '["--flag", "with space"]', # JSON keeps the quoted arg
22
+ # gap at 3 stops the scan; a stray 4 must be ignored
23
+ "MCP_SIEVE_DOWNSTREAM_4_NAME": "ignored",
24
+ }
25
+ old = dict(os.environ)
26
+ os.environ.update(env)
27
+ try:
28
+ ds = s._env_downstream()
29
+ finally:
30
+ os.environ.clear()
31
+ os.environ.update(old)
32
+
33
+ assert [d["name"] for d in ds] == ["time", "remote"], ds
34
+ assert ds[0]["args"] == ["mcp-server-time", "--local"], ds[0]
35
+ assert ds[1]["url"] == "https://example.com/mcp"
36
+ assert ds[1]["args"] == ["--flag", "with space"], ds[1] # JSON preserved the space
37
+ print("ok: _env_downstream")
38
+
39
+
40
+ def test_apply_env_merge():
41
+ cfg = {"downstream": [{"name": "time", "command": "old"}, {"name": "git", "command": "git"}]}
42
+ old = dict(os.environ)
43
+ os.environ.update({
44
+ "MCP_SIEVE_DOWNSTREAM_1_NAME": "time", # same name → replaces
45
+ "MCP_SIEVE_DOWNSTREAM_1_COMMAND": "new",
46
+ "MCP_SIEVE_DOWNSTREAM_2_NAME": "arxiv", # new name → appends
47
+ "MCP_SIEVE_DOWNSTREAM_2_COMMAND": "uvx",
48
+ "MCP_SIEVE_TOP_N": "5",
49
+ })
50
+ try:
51
+ out = s._apply_env(cfg)
52
+ finally:
53
+ os.environ.clear()
54
+ os.environ.update(old)
55
+
56
+ by_name = {d["name"]: d for d in out["downstream"]}
57
+ assert by_name["time"]["command"] == "new", by_name["time"]
58
+ assert by_name["git"]["command"] == "git" # untouched yaml entry survives
59
+ assert by_name["arxiv"]["command"] == "uvx" # env-only entry added
60
+ assert out["embeddings"]["top_n"] == 5
61
+ print("ok: _apply_env merge")
62
+
63
+
64
+ def test_reconnect_supervisor():
65
+ """_supervise must survive a failed first connect and re-heal after the session dies."""
66
+ attempts = {"n": 0}
67
+
68
+ class FakeSession:
69
+ def __init__(self):
70
+ self.pinged = False
71
+
72
+ async def send_ping(self):
73
+ if self.pinged: # dies on the 2nd ping
74
+ raise ConnectionError("transport gone")
75
+ self.pinged = True
76
+
77
+ async def fake_connect(ds, stack):
78
+ attempts["n"] += 1
79
+ if attempts["n"] == 1:
80
+ raise ConnectionError("first attempt fails")
81
+ return FakeSession()
82
+
83
+ async def noop_register(name, session):
84
+ return None
85
+
86
+ async def run():
87
+ s._connect = fake_connect
88
+ s._register_tools = noop_register
89
+ s.RECONNECT_BASE = s.RECONNECT_CAP = s.HEALTH_INTERVAL = 0.01
90
+ ready = asyncio.Event()
91
+ task = asyncio.create_task(s._supervise({"name": "x"}, ready))
92
+ await asyncio.sleep(0.3)
93
+ task.cancel()
94
+ await asyncio.gather(task, return_exceptions=True)
95
+ assert ready.is_set() # startup unblocked despite first failure
96
+ assert attempts["n"] >= 3, attempts # failed once, connected+died, reconnected
97
+ assert "x" in s.downstream_sessions # healed: a live session is registered
98
+
99
+ asyncio.run(run())
100
+ print("ok: _supervise reconnect")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ test_env_downstream()
105
+ test_apply_env_merge()
106
+ test_reconnect_supervisor()
107
+ print("all passed")