mcp-sieve 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sieve-0.1.0/.gitignore +13 -0
- mcp_sieve-0.1.0/CLAUDE.md +5 -0
- mcp_sieve-0.1.0/LICENSE +21 -0
- mcp_sieve-0.1.0/PKG-INFO +206 -0
- mcp_sieve-0.1.0/README.md +161 -0
- mcp_sieve-0.1.0/TASKS.md +34 -0
- mcp_sieve-0.1.0/config.example.yaml +61 -0
- mcp_sieve-0.1.0/pyproject.toml +42 -0
- mcp_sieve-0.1.0/scripts/smoke_test.sh +19 -0
- mcp_sieve-0.1.0/src/mcp_router/__init__.py +1 -0
- mcp_sieve-0.1.0/src/mcp_router/server.py +482 -0
- mcp_sieve-0.1.0/test_transport_env_reconnect.py +107 -0
mcp_sieve-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dimarch0x
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mcp_sieve-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcp-sieve
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP Sieve — semantic tool selection proxy for LLMs. Aggregates downstream MCP servers into 2 static tools (select + call) via Ollama embeddings.
|
|
5
|
+
Project-URL: Homepage, https://github.com/dimarch0x/mcp-sieve
|
|
6
|
+
Project-URL: Repository, https://github.com/dimarch0x/mcp-sieve
|
|
7
|
+
Project-URL: Issues, https://github.com/dimarch0x/mcp-sieve/issues
|
|
8
|
+
Author: dimarch0x
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 dimarch0x
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: llm,mcp,mcp-server,ollama,semantic-routing,tool-selection
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
39
|
+
Requires-Python: >=3.11
|
|
40
|
+
Requires-Dist: httpx>=0.27.0
|
|
41
|
+
Requires-Dist: mcp>=1.0.0
|
|
42
|
+
Requires-Dist: numpy>=1.26.0
|
|
43
|
+
Requires-Dist: pyyaml>=6.0
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# MCP Sieve
|
|
47
|
+
|
|
48
|
+
A semantic proxy for MCP servers. Solves **tool selection degradation** — when an LLM has too many tools, it picks the wrong ones.
|
|
49
|
+
|
|
50
|
+
The sieve sits between the client (Claude Code, Hermes, any MCP client) and downstream MCP servers. The client sees **2 tools** instead of dozens: `mcp_router_select` + `mcp_router_call`. The first finds relevant tools via embeddings, the second proxies the call.
|
|
51
|
+
|
|
52
|
+
## How it works
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Client (Claude Code / Hermes)
|
|
56
|
+
↓ sees only 2 tools
|
|
57
|
+
mcp_router_select(task="...") → embeddings → top-N relevant tools
|
|
58
|
+
mcp_router_call(tool_name, arguments) → proxies to downstream
|
|
59
|
+
↓
|
|
60
|
+
downstream MCP servers (time, fetch, git, arxiv, playwright, ...)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Two call paths:**
|
|
64
|
+
|
|
65
|
+
1. **Path 1 (notifications/tools/list_changed):** `select` finds tools → sieve updates `tools/list` → client calls tools directly. Works with clients that support dynamic toolset updates.
|
|
66
|
+
|
|
67
|
+
2. **Path 2 (mcp_router_call proxy):** for clients with a **frozen toolset** (Hermes, Claude Code with prompt caching). `select` returns tools with `inputSchema` → `call` proxies execution. No `/reset` needed when new downstream tools are discovered.
|
|
68
|
+
|
|
69
|
+
## Install
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
git clone <repo> ~/Projects/mcp-sieve
|
|
73
|
+
cd ~/Projects/mcp-sieve
|
|
74
|
+
uv pip install -e .
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Requires [Ollama](https://ollama.com) with an embed model:
|
|
78
|
+
```bash
|
|
79
|
+
ollama pull nomic-embed-text
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Quick start
|
|
83
|
+
|
|
84
|
+
Copy the example config and edit it:
|
|
85
|
+
```bash
|
|
86
|
+
cp config.example.yaml config.yaml
|
|
87
|
+
# edit config.yaml — add your downstream servers and paths
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Run standalone:
|
|
91
|
+
```bash
|
|
92
|
+
python -m mcp_router.server
|
|
93
|
+
```
|
|
94
|
+
Server listens on stdio (JSON-RPC).
|
|
95
|
+
|
|
96
|
+
## Connect to Claude Code
|
|
97
|
+
|
|
98
|
+
In `~/.claude.json` → `projects["<path>"].mcpServers`:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
"sieve": {
|
|
102
|
+
"type": "stdio",
|
|
103
|
+
"command": "uvx",
|
|
104
|
+
"args": ["--from", "/path/to/mcp-sieve", "mcp-sieve"],
|
|
105
|
+
"env": {
|
|
106
|
+
"MCP_ROUTER_CONFIG": "/path/to/mcp-sieve/config.yaml"
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Or via CLI:
|
|
112
|
+
```bash
|
|
113
|
+
claude mcp add sieve -- uvx --from /path/to/mcp-sieve mcp-sieve
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
> **Windows:** `MCP_ROUTER_CONFIG` is required — `uvx` installs the package into an isolated venv, `__file__` points into uv-cache. See [Windows notes](#windows-notes) below.
|
|
117
|
+
|
|
118
|
+
## Connect to Hermes
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
hermes mcp add sieve --command uvx --args "--from" --args "/path/to/mcp-sieve" --args "mcp-sieve"
|
|
122
|
+
hermes mcp test sieve
|
|
123
|
+
# /reset in chat
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Config
|
|
127
|
+
|
|
128
|
+
`config.yaml` (see `config.example.yaml` for a full template):
|
|
129
|
+
|
|
130
|
+
```yaml
|
|
131
|
+
downstream:
|
|
132
|
+
- name: time
|
|
133
|
+
command: uvx
|
|
134
|
+
args: ["mcp-server-time"]
|
|
135
|
+
|
|
136
|
+
- name: fetch
|
|
137
|
+
command: uvx
|
|
138
|
+
args: ["mcp-server-fetch"]
|
|
139
|
+
|
|
140
|
+
- name: git
|
|
141
|
+
command: uvx
|
|
142
|
+
args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
|
|
143
|
+
|
|
144
|
+
# Windows: npx is a .cmd file — needs cmd /c
|
|
145
|
+
- name: context7
|
|
146
|
+
command: cmd
|
|
147
|
+
args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
|
|
148
|
+
|
|
149
|
+
- name: filesystem
|
|
150
|
+
command: cmd
|
|
151
|
+
args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"]
|
|
152
|
+
|
|
153
|
+
# Remote MCP over HTTP (streamable) or SSE — no local process.
|
|
154
|
+
# transport defaults to stdio; a bare url implies http.
|
|
155
|
+
- name: gitmcp
|
|
156
|
+
transport: http
|
|
157
|
+
url: "https://gitmcp.io/docs"
|
|
158
|
+
|
|
159
|
+
embeddings:
|
|
160
|
+
ollama_url: "http://127.0.0.1:11434/api/embeddings"
|
|
161
|
+
model: "nomic-embed-text"
|
|
162
|
+
top_n: 10
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Env variables:
|
|
166
|
+
- `MCP_ROUTER_CONFIG` — path to `config.yaml` (otherwise looks in CWD or next to source)
|
|
167
|
+
- `MCP_SIEVE_DOWNSTREAM_<N>_NAME` / `_COMMAND` / `_ARGS` / `_URL` / `_TRANSPORT` — define downstream servers without a file (Docker/k8s). `N` starts at 1, stops at the first gap. `_ARGS` is a JSON array or whitespace-split. A same-named entry overrides the yaml one.
|
|
168
|
+
- `MCP_SIEVE_OLLAMA_URL` / `MCP_SIEVE_EMBED_MODEL` / `MCP_SIEVE_TOP_N` — embeddings overrides
|
|
169
|
+
|
|
170
|
+
Crashed downstream servers (Ollama, npx) auto-reconnect with exponential backoff — no restart needed.
|
|
171
|
+
|
|
172
|
+
## Windows notes
|
|
173
|
+
|
|
174
|
+
1. **npx → `cmd /c npx`:** `npx` is a `.cmd` file, Python subprocess (MCP SDK) can't find it without a shell. `uvx` is a real binary, works directly.
|
|
175
|
+
|
|
176
|
+
2. **uvx --from and dependencies:** `uvx --from <project>` installs the package into an isolated uv-cache venv. All imports must be in `pyproject.toml` `[project.dependencies]` — implicit deps from the dev env won't be picked up.
|
|
177
|
+
|
|
178
|
+
3. **uv cache clean:** if the cache is locked (`os error 32`), kill MCP server processes first:
|
|
179
|
+
```bash
|
|
180
|
+
powershell -Command "Get-Process | Where-Object { $_.ProcessName -match 'mcp|uv' } | Stop-Process -Force"
|
|
181
|
+
uv cache clean --force
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
4. **Debug connection failures:** `claude --debug` writes to `~/.claude/debug/<session>.txt`. Grep `Server stderr:` for real server tracebacks.
|
|
185
|
+
|
|
186
|
+
## Stack
|
|
187
|
+
|
|
188
|
+
- **MCP Python SDK** (`mcp`) — stdio + HTTP/SSE transports, `notifications/tools/list_changed`
|
|
189
|
+
- **Ollama** — local embeddings (`nomic-embed-text`), free
|
|
190
|
+
- **numpy** — cosine similarity
|
|
191
|
+
- **httpx** — HTTP client for Ollama API
|
|
192
|
+
|
|
193
|
+
## Fallback
|
|
194
|
+
|
|
195
|
+
If Ollama is unavailable — the sieve doesn't crash. `mcp_router_select` returns **all** downstream tools without ranking, with a warning in the response.
|
|
196
|
+
|
|
197
|
+
## Performance
|
|
198
|
+
|
|
199
|
+
Tested with 9 downstream servers (74 tools):
|
|
200
|
+
- `mcp_router_select`: 83–166ms
|
|
201
|
+
- `mcp_router_call`: 15–774ms (longest: playwright browser navigation)
|
|
202
|
+
- Startup: ~16s (all 9 downstream connect + 74 embeddings built)
|
|
203
|
+
|
|
204
|
+
## Status
|
|
205
|
+
|
|
206
|
+
Working end-to-end in Claude Code and Hermes. See `TASKS.md` for the roadmap and benchmark results.
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# MCP Sieve
|
|
2
|
+
|
|
3
|
+
A semantic proxy for MCP servers. Solves **tool selection degradation** — when an LLM has too many tools, it picks the wrong ones.
|
|
4
|
+
|
|
5
|
+
The sieve sits between the client (Claude Code, Hermes, any MCP client) and downstream MCP servers. The client sees **2 tools** instead of dozens: `mcp_router_select` + `mcp_router_call`. The first finds relevant tools via embeddings, the second proxies the call.
|
|
6
|
+
|
|
7
|
+
## How it works
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
Client (Claude Code / Hermes)
|
|
11
|
+
↓ sees only 2 tools
|
|
12
|
+
mcp_router_select(task="...") → embeddings → top-N relevant tools
|
|
13
|
+
mcp_router_call(tool_name, arguments) → proxies to downstream
|
|
14
|
+
↓
|
|
15
|
+
downstream MCP servers (time, fetch, git, arxiv, playwright, ...)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Two call paths:**
|
|
19
|
+
|
|
20
|
+
1. **Path 1 (notifications/tools/list_changed):** `select` finds tools → sieve updates `tools/list` → client calls tools directly. Works with clients that support dynamic toolset updates.
|
|
21
|
+
|
|
22
|
+
2. **Path 2 (mcp_router_call proxy):** for clients with a **frozen toolset** (Hermes, Claude Code with prompt caching). `select` returns tools with `inputSchema` → `call` proxies execution. No `/reset` needed when new downstream tools are discovered.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone <repo> ~/Projects/mcp-sieve
|
|
28
|
+
cd ~/Projects/mcp-sieve
|
|
29
|
+
uv pip install -e .
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Requires [Ollama](https://ollama.com) with an embed model:
|
|
33
|
+
```bash
|
|
34
|
+
ollama pull nomic-embed-text
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quick start
|
|
38
|
+
|
|
39
|
+
Copy the example config and edit it:
|
|
40
|
+
```bash
|
|
41
|
+
cp config.example.yaml config.yaml
|
|
42
|
+
# edit config.yaml — add your downstream servers and paths
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Run standalone:
|
|
46
|
+
```bash
|
|
47
|
+
python -m mcp_router.server
|
|
48
|
+
```
|
|
49
|
+
Server listens on stdio (JSON-RPC).
|
|
50
|
+
|
|
51
|
+
## Connect to Claude Code
|
|
52
|
+
|
|
53
|
+
In `~/.claude.json` → `projects["<path>"].mcpServers`:
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
"sieve": {
|
|
57
|
+
"type": "stdio",
|
|
58
|
+
"command": "uvx",
|
|
59
|
+
"args": ["--from", "/path/to/mcp-sieve", "mcp-sieve"],
|
|
60
|
+
"env": {
|
|
61
|
+
"MCP_ROUTER_CONFIG": "/path/to/mcp-sieve/config.yaml"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Or via CLI:
|
|
67
|
+
```bash
|
|
68
|
+
claude mcp add sieve -- uvx --from /path/to/mcp-sieve mcp-sieve
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
> **Windows:** `MCP_ROUTER_CONFIG` is required — `uvx` installs the package into an isolated venv, `__file__` points into uv-cache. See [Windows notes](#windows-notes) below.
|
|
72
|
+
|
|
73
|
+
## Connect to Hermes
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
hermes mcp add sieve --command uvx --args "--from" --args "/path/to/mcp-sieve" --args "mcp-sieve"
|
|
77
|
+
hermes mcp test sieve
|
|
78
|
+
# /reset in chat
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Config
|
|
82
|
+
|
|
83
|
+
`config.yaml` (see `config.example.yaml` for a full template):
|
|
84
|
+
|
|
85
|
+
```yaml
|
|
86
|
+
downstream:
|
|
87
|
+
- name: time
|
|
88
|
+
command: uvx
|
|
89
|
+
args: ["mcp-server-time"]
|
|
90
|
+
|
|
91
|
+
- name: fetch
|
|
92
|
+
command: uvx
|
|
93
|
+
args: ["mcp-server-fetch"]
|
|
94
|
+
|
|
95
|
+
- name: git
|
|
96
|
+
command: uvx
|
|
97
|
+
args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
|
|
98
|
+
|
|
99
|
+
# Windows: npx is a .cmd file — needs cmd /c
|
|
100
|
+
- name: context7
|
|
101
|
+
command: cmd
|
|
102
|
+
args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
|
|
103
|
+
|
|
104
|
+
- name: filesystem
|
|
105
|
+
command: cmd
|
|
106
|
+
args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"]
|
|
107
|
+
|
|
108
|
+
# Remote MCP over HTTP (streamable) or SSE — no local process.
|
|
109
|
+
# transport defaults to stdio; a bare url implies http.
|
|
110
|
+
- name: gitmcp
|
|
111
|
+
transport: http
|
|
112
|
+
url: "https://gitmcp.io/docs"
|
|
113
|
+
|
|
114
|
+
embeddings:
|
|
115
|
+
ollama_url: "http://127.0.0.1:11434/api/embeddings"
|
|
116
|
+
model: "nomic-embed-text"
|
|
117
|
+
top_n: 10
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Env variables:
|
|
121
|
+
- `MCP_ROUTER_CONFIG` — path to `config.yaml` (otherwise looks in CWD or next to source)
|
|
122
|
+
- `MCP_SIEVE_DOWNSTREAM_<N>_NAME` / `_COMMAND` / `_ARGS` / `_URL` / `_TRANSPORT` — define downstream servers without a file (Docker/k8s). `N` starts at 1, stops at the first gap. `_ARGS` is a JSON array or whitespace-split. A same-named entry overrides the yaml one.
|
|
123
|
+
- `MCP_SIEVE_OLLAMA_URL` / `MCP_SIEVE_EMBED_MODEL` / `MCP_SIEVE_TOP_N` — embeddings overrides
|
|
124
|
+
|
|
125
|
+
Crashed downstream servers (Ollama, npx) auto-reconnect with exponential backoff — no restart needed.
|
|
126
|
+
|
|
127
|
+
## Windows notes
|
|
128
|
+
|
|
129
|
+
1. **npx → `cmd /c npx`:** `npx` is a `.cmd` file, Python subprocess (MCP SDK) can't find it without a shell. `uvx` is a real binary, works directly.
|
|
130
|
+
|
|
131
|
+
2. **uvx --from and dependencies:** `uvx --from <project>` installs the package into an isolated uv-cache venv. All imports must be in `pyproject.toml` `[project.dependencies]` — implicit deps from the dev env won't be picked up.
|
|
132
|
+
|
|
133
|
+
3. **uv cache clean:** if the cache is locked (`os error 32`), kill MCP server processes first:
|
|
134
|
+
```bash
|
|
135
|
+
powershell -Command "Get-Process | Where-Object { $_.ProcessName -match 'mcp|uv' } | Stop-Process -Force"
|
|
136
|
+
uv cache clean --force
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
4. **Debug connection failures:** `claude --debug` writes to `~/.claude/debug/<session>.txt`. Grep `Server stderr:` for real server tracebacks.
|
|
140
|
+
|
|
141
|
+
## Stack
|
|
142
|
+
|
|
143
|
+
- **MCP Python SDK** (`mcp`) — stdio + HTTP/SSE transports, `notifications/tools/list_changed`
|
|
144
|
+
- **Ollama** — local embeddings (`nomic-embed-text`), free
|
|
145
|
+
- **numpy** — cosine similarity
|
|
146
|
+
- **httpx** — HTTP client for Ollama API
|
|
147
|
+
|
|
148
|
+
## Fallback
|
|
149
|
+
|
|
150
|
+
If Ollama is unavailable — the sieve doesn't crash. `mcp_router_select` returns **all** downstream tools without ranking, with a warning in the response.
|
|
151
|
+
|
|
152
|
+
## Performance
|
|
153
|
+
|
|
154
|
+
Tested with 9 downstream servers (74 tools):
|
|
155
|
+
- `mcp_router_select`: 83–166ms
|
|
156
|
+
- `mcp_router_call`: 15–774ms (longest: playwright browser navigation)
|
|
157
|
+
- Startup: ~16s (all 9 downstream connect + 74 embeddings built)
|
|
158
|
+
|
|
159
|
+
## Status
|
|
160
|
+
|
|
161
|
+
Working end-to-end in Claude Code and Hermes. See `TASKS.md` for the roadmap and benchmark results.
|
mcp_sieve-0.1.0/TASKS.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Roadmap — MCP Sieve
|
|
2
|
+
|
|
3
|
+
## Completed
|
|
4
|
+
|
|
5
|
+
- **Core MCP server** — stdio transport, `initialize`/`tools/list`/`tools/call`, `listChanged` capability
|
|
6
|
+
- **Downstream discovery** — connects to all downstream MCP servers from `config.yaml`, pulls tools via `list_tools()`, one failing server doesn't crash the rest
|
|
7
|
+
- **Semantic search** — Ollama embeddings (`nomic-embed-text`), cosine similarity, top-N ranking. Fallback: if Ollama is down, returns all tools with a warning
|
|
8
|
+
- **Call routing** — `tools/call` proxied to the correct downstream. Unknown tools → clear error
|
|
9
|
+
- **`mcp_router_call` proxy** — for clients with frozen toolsets (prompt caching). `select` returns tools with `inputSchema`, `call` proxies execution. No `/reset` needed on new downstream tools
|
|
10
|
+
- **Cross-platform** — tested on Windows (npx via `cmd /c`, config discovery via `MCP_ROUTER_CONFIG` env). Works with Claude Code and Hermes
|
|
11
|
+
- **HTTP/SSE downstream transport** — `transport: http|sse` + `url` in config connects remote MCP (GitMCP, Cloudflare Remote MCP). stdio stays the default
|
|
12
|
+
- **Env-var config** — `MCP_SIEVE_DOWNSTREAM_<N>_*` and embeddings overrides, for Docker/k8s with no config file. Merges over yaml by name
|
|
13
|
+
- **Auto-reconnect** — per-downstream supervisor tasks with `send_ping` liveness + exponential backoff. A crashed downstream (Ollama, npx) self-heals instead of being lost until restart
|
|
14
|
+
|
|
15
|
+
## Benchmark
|
|
16
|
+
|
|
17
|
+
Tested with **10 downstream servers (79 tools)** in Claude Code across 5 long tasks:
|
|
18
|
+
|
|
19
|
+
| Metric | Result |
|
|
20
|
+
|--------|--------|
|
|
21
|
+
| `mcp_router_select` latency | 83–166ms |
|
|
22
|
+
| `mcp_router_call` latency | 15–774ms |
|
|
23
|
+
| Startup (10 servers + 79 embeddings) | ~16–18s |
|
|
24
|
+
| Tools in LLM system prompt | 2 (always) |
|
|
25
|
+
| Router accuracy (relevant tool in top-10) | 100% across test tasks |
|
|
26
|
+
| Remote HTTP transport | verified (GitMCP via streamable_http) |
|
|
27
|
+
|
|
28
|
+
**Downstream servers tested:** time, fetch, git, arxiv, context7, filesystem, playwright, sequential-thinking, memory, gitmcp (HTTP)
|
|
29
|
+
|
|
30
|
+
**Observation:** Claude Code prefers native tools when they overlap (Bash vs git MCP, Write vs filesystem MCP). The router is most valuable for tools without native equivalents — arxiv, playwright, memory, context7.
|
|
31
|
+
|
|
32
|
+
## Planned
|
|
33
|
+
|
|
34
|
+
- FAISS for >1000 tools (currently numpy cosine sim) — deferred; numpy is fine below ~1000 tools
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# MCP Sieve configuration
|
|
2
|
+
# Copy this file to config.yaml and edit for your setup.
|
|
3
|
+
# Real config.yaml is gitignored — it contains your local paths.
|
|
4
|
+
|
|
5
|
+
# Downstream MCP servers that the router will aggregate.
|
|
6
|
+
# Each entry spawns a stdio MCP server process.
|
|
7
|
+
downstream:
|
|
8
|
+
- name: time
|
|
9
|
+
command: uvx
|
|
10
|
+
args: ["mcp-server-time"]
|
|
11
|
+
|
|
12
|
+
- name: fetch
|
|
13
|
+
command: uvx
|
|
14
|
+
args: ["mcp-server-fetch"]
|
|
15
|
+
|
|
16
|
+
- name: git
|
|
17
|
+
command: uvx
|
|
18
|
+
args: ["mcp-server-git", "--repository", "/path/to/your/repo"]
|
|
19
|
+
|
|
20
|
+
- name: arxiv
|
|
21
|
+
command: uvx
|
|
22
|
+
args: ["arxiv-mcp-server"]
|
|
23
|
+
|
|
24
|
+
# Windows: npx is a .cmd file — Python subprocess can't find it without a shell.
|
|
25
|
+
# Use "cmd /c npx" instead of just "npx".
|
|
26
|
+
- name: context7
|
|
27
|
+
command: cmd
|
|
28
|
+
args: ["/c", "npx", "-y", "@upstash/context7-mcp@latest"]
|
|
29
|
+
|
|
30
|
+
- name: filesystem
|
|
31
|
+
command: cmd
|
|
32
|
+
args: ["/c", "npx", "-y", "@modelcontextprotocol/server-filesystem", "/path/to/allowed/dir"]
|
|
33
|
+
|
|
34
|
+
- name: playwright
|
|
35
|
+
command: cmd
|
|
36
|
+
args: ["/c", "npx", "-y", "@playwright/mcp@latest"]
|
|
37
|
+
|
|
38
|
+
- name: sequential-thinking
|
|
39
|
+
command: cmd
|
|
40
|
+
args: ["/c", "npx", "-y", "@modelcontextprotocol/server-sequential-thinking"]
|
|
41
|
+
|
|
42
|
+
- name: memory
|
|
43
|
+
command: cmd
|
|
44
|
+
args: ["/c", "npx", "-y", "@modelcontextprotocol/server-memory"]
|
|
45
|
+
|
|
46
|
+
# Remote MCP over HTTP (streamable) or SSE — no local process spawned.
|
|
47
|
+
# `transport` defaults to stdio; a bare `url` implies http.
|
|
48
|
+
- name: gitmcp
|
|
49
|
+
transport: http # or: sse
|
|
50
|
+
url: "https://gitmcp.io/docs"
|
|
51
|
+
|
|
52
|
+
# Env-var config (Docker/k8s, no file): MCP_SIEVE_DOWNSTREAM_1_NAME,
|
|
53
|
+
# _COMMAND, _ARGS (JSON array or whitespace-split), _URL, _TRANSPORT.
|
|
54
|
+
# N starts at 1 and stops at the first gap. Same-named env entry overrides yaml.
|
|
55
|
+
# Also: MCP_SIEVE_OLLAMA_URL, MCP_SIEVE_EMBED_MODEL, MCP_SIEVE_TOP_N.
|
|
56
|
+
|
|
57
|
+
# Embeddings settings (Ollama)
|
|
58
|
+
embeddings:
|
|
59
|
+
ollama_url: "http://127.0.0.1:11434/api/embeddings"
|
|
60
|
+
model: "nomic-embed-text"
|
|
61
|
+
top_n: 10 # how many relevant tools to return per select call
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcp-sieve"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP Sieve — semantic tool selection proxy for LLMs. Aggregates downstream MCP servers into 2 static tools (select + call) via Ollama embeddings."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { file = "LICENSE" }
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
authors = [{ name = "dimarch0x" }]
|
|
9
|
+
keywords = ["mcp", "mcp-server", "semantic-routing", "llm", "ollama", "tool-selection"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
"Programming Language :: Python :: 3.11",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
"Programming Language :: Python :: 3.13",
|
|
17
|
+
"Topic :: Software Development :: Libraries",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"mcp>=1.0.0",
|
|
21
|
+
"httpx>=0.27.0",
|
|
22
|
+
"numpy>=1.26.0",
|
|
23
|
+
"pyyaml>=6.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/dimarch0x/mcp-sieve"
|
|
28
|
+
Repository = "https://github.com/dimarch0x/mcp-sieve"
|
|
29
|
+
Issues = "https://github.com/dimarch0x/mcp-sieve/issues"
|
|
30
|
+
|
|
31
|
+
[project.scripts]
|
|
32
|
+
mcp-sieve = "mcp_router.server:main"
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["hatchling"]
|
|
36
|
+
build-backend = "hatchling.build"
|
|
37
|
+
|
|
38
|
+
[tool.uv]
|
|
39
|
+
package = true
|
|
40
|
+
|
|
41
|
+
[tool.hatch.build.targets.wheel]
|
|
42
|
+
packages = ["src/mcp_router"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Smoke test: send JSON-RPC requests to mcp-sieve via stdio.
|
|
3
|
+
# Checks: initialize → tools/list → tools/call(mcp_router_select)
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
cd "$(dirname "$0")/.."
|
|
7
|
+
|
|
8
|
+
# JSON-RPC initialize
|
|
9
|
+
INIT='{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"0.1"}}}'
|
|
10
|
+
# initialized notification
|
|
11
|
+
INIT_DONE='{"jsonrpc":"2.0","method":"notifications/initialized"}'
|
|
12
|
+
# tools/list
|
|
13
|
+
LIST='{"jsonrpc":"2.0","id":2,"method":"tools/list"}'
|
|
14
|
+
# tools/call mcp_router_select
|
|
15
|
+
CALL='{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"mcp_router_select","arguments":{"task":"get current time in UTC"}}}'
|
|
16
|
+
|
|
17
|
+
# Pipe all requests, read responses
|
|
18
|
+
printf '%s\n%s\n%s\n%s\n' "$INIT" "$INIT_DONE" "$LIST" "$CALL" | \
|
|
19
|
+
timeout 10 python -m mcp_router.server 2>/dev/null
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MCP Sieve package."""
|
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
"""MCP Sieve — semantic tool selection proxy.
|
|
2
|
+
|
|
3
|
+
Path 1 (notifications/tools/list_changed):
|
|
4
|
+
1. tools/list returns 1 tool: mcp_router_select
|
|
5
|
+
2. LLM calls mcp_router_select(task="...") → sieve finds top-N tools
|
|
6
|
+
3. Sieve updates current_tools + sends notifications/tools/list_changed
|
|
7
|
+
4. tools/list now returns the relevant tools
|
|
8
|
+
5. LLM calls a tool directly → sieve proxies to downstream
|
|
9
|
+
|
|
10
|
+
Path 2 (mcp_router_call):
|
|
11
|
+
For clients with a frozen toolset (e.g. Hermes with prompt caching):
|
|
12
|
+
1. LLM calls mcp_router_select(task="...") → gets a list of suitable tools with their inputSchema.
|
|
13
|
+
2. LLM calls mcp_router_call(tool_name="...", arguments={...}) to execute the chosen tool.
|
|
14
|
+
|
|
15
|
+
ponytail: one file, everything in it.
|
|
16
|
+
"""
|
|
17
|
+
import asyncio
|
|
18
|
+
import contextlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import NamedTuple
|
|
24
|
+
|
|
25
|
+
import httpx
|
|
26
|
+
import numpy as np
|
|
27
|
+
import yaml
|
|
28
|
+
from mcp import ClientSession, StdioServerParameters
|
|
29
|
+
from mcp.client.sse import sse_client
|
|
30
|
+
from mcp.client.stdio import stdio_client
|
|
31
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
32
|
+
from mcp.server import NotificationOptions, Server
|
|
33
|
+
from mcp.server.stdio import stdio_server
|
|
34
|
+
from mcp.types import CallToolResult, EmbeddedResource, ImageContent, TextContent, Tool
|
|
35
|
+
|
|
36
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
|
|
37
|
+
log = logging.getLogger("mcp-sieve")
|
|
38
|
+
|
|
39
|
+
# --- Config ------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
# ponytail: when launched via uvx --from <project>, the package lives in uv-cache,
|
|
42
|
+
# __file__ points there. Look for config.yaml: (1) env MCP_ROUTER_CONFIG,
|
|
43
|
+
# (2) next to CWD, (3) fallback to the old path relative to source.
|
|
44
|
+
def _find_config() -> Path:
|
|
45
|
+
env = os.environ.get("MCP_ROUTER_CONFIG")
|
|
46
|
+
if env:
|
|
47
|
+
p = Path(env).expanduser()
|
|
48
|
+
if p.exists():
|
|
49
|
+
return p
|
|
50
|
+
cwd_cfg = Path.cwd() / "config.yaml"
|
|
51
|
+
if cwd_cfg.exists():
|
|
52
|
+
return cwd_cfg
|
|
53
|
+
src_cfg = Path(__file__).resolve().parent.parent.parent / "config.yaml"
|
|
54
|
+
return src_cfg
|
|
55
|
+
|
|
56
|
+
CONFIG_PATH = _find_config()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _env_downstream() -> list[dict]:
|
|
60
|
+
"""Read MCP_SIEVE_DOWNSTREAM_<N>_* env vars into downstream entries.
|
|
61
|
+
|
|
62
|
+
ponytail: for Docker/k8s where mounting a file is a pain. N starts at 1,
|
|
63
|
+
stops at the first gap. _ARGS is a JSON array (for quoting/backslashes) or
|
|
64
|
+
plain whitespace-split. _URL/_TRANSPORT enable HTTP/SSE downstream.
|
|
65
|
+
"""
|
|
66
|
+
out: list[dict] = []
|
|
67
|
+
n = 1
|
|
68
|
+
while True:
|
|
69
|
+
name = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_NAME")
|
|
70
|
+
if not name:
|
|
71
|
+
break
|
|
72
|
+
entry: dict = {"name": name}
|
|
73
|
+
for key, field in (("COMMAND", "command"), ("URL", "url"), ("TRANSPORT", "transport")):
|
|
74
|
+
val = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_{key}")
|
|
75
|
+
if val:
|
|
76
|
+
entry[field] = val
|
|
77
|
+
raw_args = os.environ.get(f"MCP_SIEVE_DOWNSTREAM_{n}_ARGS")
|
|
78
|
+
if raw_args:
|
|
79
|
+
entry["args"] = json.loads(raw_args) if raw_args.lstrip().startswith("[") else raw_args.split()
|
|
80
|
+
out.append(entry)
|
|
81
|
+
n += 1
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _apply_env(cfg: dict) -> dict:
|
|
86
|
+
"""Merge env-var overrides into a loaded config dict."""
|
|
87
|
+
downstream = cfg.get("downstream") or []
|
|
88
|
+
env_ds = _env_downstream()
|
|
89
|
+
if env_ds:
|
|
90
|
+
# name-keyed merge: env entry replaces a same-named yaml entry, else appends
|
|
91
|
+
by_name = {d["name"]: d for d in downstream}
|
|
92
|
+
for d in env_ds:
|
|
93
|
+
by_name[d["name"]] = d
|
|
94
|
+
cfg["downstream"] = list(by_name.values())
|
|
95
|
+
|
|
96
|
+
emb = cfg.setdefault("embeddings", {})
|
|
97
|
+
for key, field in (("OLLAMA_URL", "ollama_url"), ("EMBED_MODEL", "model")):
|
|
98
|
+
val = os.environ.get(f"MCP_SIEVE_{key}")
|
|
99
|
+
if val:
|
|
100
|
+
emb[field] = val
|
|
101
|
+
if os.environ.get("MCP_SIEVE_TOP_N"):
|
|
102
|
+
emb["top_n"] = int(os.environ["MCP_SIEVE_TOP_N"])
|
|
103
|
+
return cfg
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def load_config() -> dict:
|
|
107
|
+
if not CONFIG_PATH.exists():
|
|
108
|
+
log.info("config.yaml not found (%s), relying on env vars", CONFIG_PATH)
|
|
109
|
+
return _apply_env({"downstream": [], "embeddings": {}})
|
|
110
|
+
log.info("loading config: %s", CONFIG_PATH)
|
|
111
|
+
cfg = yaml.safe_load(CONFIG_PATH.read_text(encoding="utf-8")) or {}
|
|
112
|
+
return _apply_env(cfg)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
CFG = load_config()
|
|
116
|
+
EMBED_CFG = CFG.get("embeddings", {})
|
|
117
|
+
OLLAMA_URL = EMBED_CFG.get("ollama_url", "http://127.0.0.1:11434/api/embeddings")
|
|
118
|
+
EMBED_MODEL = EMBED_CFG.get("model", "nomic-embed-text")
|
|
119
|
+
TOP_N = EMBED_CFG.get("top_n", 10)
|
|
120
|
+
DOWNSTREAM = CFG.get("downstream", [])
|
|
121
|
+
|
|
122
|
+
# --- State -------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class RegisteredTool(NamedTuple):
|
|
126
|
+
downstream: str
|
|
127
|
+
orig_name: str
|
|
128
|
+
tool: Tool
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ns_name → RegisteredTool
|
|
132
|
+
tool_registry: dict[str, RegisteredTool] = {}
|
|
133
|
+
# ns_name → embedding vector (np.ndarray)
|
|
134
|
+
embeddings_cache: dict[str, np.ndarray] = {}
|
|
135
|
+
# downstream_name → ClientSession
|
|
136
|
+
downstream_sessions: dict[str, ClientSession] = {}
|
|
137
|
+
# current tool list exposed via tools/list
|
|
138
|
+
current_tools: list[Tool] = []
|
|
139
|
+
|
|
140
|
+
# --- Embedding ---------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
_embed_client: httpx.AsyncClient | None = None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
async def embed(text: str) -> np.ndarray:
|
|
146
|
+
"""Get text embedding via Ollama. ponytail: in-memory cache, no DB."""
|
|
147
|
+
global _embed_client
|
|
148
|
+
if _embed_client is None:
|
|
149
|
+
_embed_client = httpx.AsyncClient(timeout=30)
|
|
150
|
+
r = await _embed_client.post(OLLAMA_URL, json={"model": EMBED_MODEL, "prompt": text})
|
|
151
|
+
r.raise_for_status()
|
|
152
|
+
return np.array(r.json()["embedding"], dtype=np.float32)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def cosine_sim(a: np.ndarray, b: np.ndarray) -> float:
|
|
156
|
+
# ponytail: numpy — fine up to 1000 tools; beyond that use FAISS.
|
|
157
|
+
return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# --- Helpers -----------------------------------------------------------------
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _text(text: str) -> list[TextContent]:
|
|
164
|
+
return [TextContent(type="text", text=text)]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _tool_dict(t: Tool) -> dict:
|
|
168
|
+
"""Single serialization point for Tool → dict in select responses."""
|
|
169
|
+
return {"name": t.name, "description": t.description, "inputSchema": t.inputSchema}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _resolve(name: str) -> str | None:
|
|
173
|
+
"""Resolve a tool name: exact → strip mcp_router_ prefix → unique suffix match.
|
|
174
|
+
|
|
175
|
+
ponytail: suffix match with uniqueness check — never silently picks the first.
|
|
176
|
+
'time' would match '..._get_time', but if >1 match → None (ambiguous).
|
|
177
|
+
"""
|
|
178
|
+
if name in tool_registry:
|
|
179
|
+
return name
|
|
180
|
+
if name.startswith("mcp_router_"):
|
|
181
|
+
stripped = name[len("mcp_router_"):]
|
|
182
|
+
if stripped in tool_registry:
|
|
183
|
+
return stripped
|
|
184
|
+
matches = [n for n in tool_registry if n.endswith(f"_{name}")]
|
|
185
|
+
return matches[0] if len(matches) == 1 else None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
async def _invoke(entry: RegisteredTool, args: dict) -> list[TextContent]:
|
|
189
|
+
"""Single point for calling a downstream session."""
|
|
190
|
+
session = downstream_sessions.get(entry.downstream)
|
|
191
|
+
if session is None:
|
|
192
|
+
return _text(f"downstream session {entry.downstream} not available")
|
|
193
|
+
try:
|
|
194
|
+
result: CallToolResult = await session.call_tool(entry.orig_name, args)
|
|
195
|
+
return result.content
|
|
196
|
+
except Exception as e:
|
|
197
|
+
log.exception("downstream call failed: %s/%s", entry.downstream, entry.orig_name)
|
|
198
|
+
return _text(json.dumps({"error": f"downstream call failed: {e}"}, ensure_ascii=False))
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# --- Downstream discovery ----------------------------------------------------
|
|
202
|
+
|
|
203
|
+
async def _open_transport(ds: dict, exit_stack: contextlib.AsyncExitStack):
|
|
204
|
+
"""Open the right transport for a downstream and return (read, write).
|
|
205
|
+
|
|
206
|
+
ponytail: transport defaults to stdio; a bare `url` implies http.
|
|
207
|
+
"""
|
|
208
|
+
name = ds["name"]
|
|
209
|
+
transport = ds.get("transport") or ("http" if ds.get("url") else "stdio")
|
|
210
|
+
if transport == "stdio":
|
|
211
|
+
params = StdioServerParameters(command=ds["command"], args=ds.get("args", []), env=None)
|
|
212
|
+
log.info("connecting downstream %s (stdio): %s %s", name, params.command, params.args)
|
|
213
|
+
streams = await exit_stack.enter_async_context(stdio_client(params))
|
|
214
|
+
elif transport in ("http", "streamable-http"):
|
|
215
|
+
log.info("connecting downstream %s (http): %s", name, ds["url"])
|
|
216
|
+
streams = await exit_stack.enter_async_context(streamablehttp_client(ds["url"]))
|
|
217
|
+
elif transport == "sse":
|
|
218
|
+
log.info("connecting downstream %s (sse): %s", name, ds["url"])
|
|
219
|
+
streams = await exit_stack.enter_async_context(sse_client(ds["url"]))
|
|
220
|
+
else:
|
|
221
|
+
raise ValueError(f"unknown transport '{transport}' for downstream {name}")
|
|
222
|
+
return streams[0], streams[1] # streamablehttp yields a 3rd session-id getter, ignore it
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
async def _connect(ds: dict, exit_stack: contextlib.AsyncExitStack) -> ClientSession:
|
|
226
|
+
"""Open transport + ClientSession + initialize. Shared by discovery and reconnect."""
|
|
227
|
+
read, write = await _open_transport(ds, exit_stack)
|
|
228
|
+
session = await exit_stack.enter_async_context(ClientSession(read, write))
|
|
229
|
+
await session.initialize()
|
|
230
|
+
return session
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
async def _register_tools(name: str, session: ClientSession) -> None:
|
|
234
|
+
"""Pull tools + build embeddings for one downstream. Idempotent across reconnects.
|
|
235
|
+
|
|
236
|
+
ponytail: a server's tool set is stable — register once, skip on reconnect.
|
|
237
|
+
"""
|
|
238
|
+
if any(e.downstream == name for e in tool_registry.values()):
|
|
239
|
+
return # already registered on a previous connect
|
|
240
|
+
try:
|
|
241
|
+
result = await session.list_tools()
|
|
242
|
+
except Exception as e:
|
|
243
|
+
log.error("failed to list tools from downstream %s: %s", name, e)
|
|
244
|
+
return
|
|
245
|
+
for tool in result.tools:
|
|
246
|
+
ns_name = f"{name}_{tool.name}"
|
|
247
|
+
ns_tool = Tool(name=ns_name, description=tool.description, inputSchema=tool.inputSchema)
|
|
248
|
+
tool_registry[ns_name] = RegisteredTool(name, tool.name, ns_tool)
|
|
249
|
+
try:
|
|
250
|
+
for ns_name, entry in list(tool_registry.items()):
|
|
251
|
+
if entry.downstream == name and ns_name not in embeddings_cache:
|
|
252
|
+
embeddings_cache[ns_name] = await embed(f"{entry.tool.name}: {entry.tool.description or ''}")
|
|
253
|
+
log.info("embeddings ready for %s (%d total)", name, len(embeddings_cache))
|
|
254
|
+
except Exception as e:
|
|
255
|
+
log.warning("embeddings failed for %s (%s), semantic search degraded", name, e)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# ponytail: fixed backoff cap; make configurable if flapping servers appear.
|
|
259
|
+
RECONNECT_BASE, RECONNECT_CAP, HEALTH_INTERVAL, STARTUP_TIMEOUT = 1.0, 30.0, 15.0, 60.0
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
async def _supervise(ds: dict, ready: asyncio.Event) -> None:
|
|
263
|
+
"""Keep one downstream connected; reconnect with exponential backoff on failure.
|
|
264
|
+
|
|
265
|
+
ponytail: each supervisor owns its exit_stack so connect/teardown run in the
|
|
266
|
+
same task — sidesteps anyio 'cancel scope in a different task'. Liveness via
|
|
267
|
+
periodic send_ping; a raised ping means the transport died.
|
|
268
|
+
"""
|
|
269
|
+
name = ds["name"]
|
|
270
|
+
backoff = RECONNECT_BASE
|
|
271
|
+
while True:
|
|
272
|
+
try:
|
|
273
|
+
async with contextlib.AsyncExitStack() as stack:
|
|
274
|
+
session = await _connect(ds, stack)
|
|
275
|
+
downstream_sessions[name] = session
|
|
276
|
+
backoff = RECONNECT_BASE
|
|
277
|
+
log.info("downstream %s connected", name)
|
|
278
|
+
await _register_tools(name, session)
|
|
279
|
+
ready.set()
|
|
280
|
+
while True:
|
|
281
|
+
await asyncio.sleep(HEALTH_INTERVAL)
|
|
282
|
+
await session.send_ping()
|
|
283
|
+
except asyncio.CancelledError:
|
|
284
|
+
raise
|
|
285
|
+
except Exception as e:
|
|
286
|
+
downstream_sessions.pop(name, None)
|
|
287
|
+
ready.set() # don't hold up startup on a server that won't connect
|
|
288
|
+
log.warning("downstream %s down (%s); reconnecting in %.0fs", name, e, backoff)
|
|
289
|
+
await asyncio.sleep(backoff)
|
|
290
|
+
backoff = min(backoff * 2, RECONNECT_CAP)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
# --- MCP Server --------------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
server: Server = Server("mcp-sieve")
|
|
296
|
+
|
|
297
|
+
ROUTER_SELECT_TOOL = Tool(
|
|
298
|
+
name="mcp_router_select",
|
|
299
|
+
description=(
|
|
300
|
+
"REQUIRED first step before any task involving external tools. "
|
|
301
|
+
"Pass your current task description; returns the relevant tools "
|
|
302
|
+
"available for that task. Always call this first."
|
|
303
|
+
),
|
|
304
|
+
inputSchema={
|
|
305
|
+
"type": "object",
|
|
306
|
+
"properties": {
|
|
307
|
+
"task": {
|
|
308
|
+
"type": "string",
|
|
309
|
+
"description": "Natural language description of what you want to do.",
|
|
310
|
+
}
|
|
311
|
+
},
|
|
312
|
+
"required": ["task"],
|
|
313
|
+
},
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
ROUTER_CALL_TOOL = Tool(
|
|
317
|
+
name="mcp_router_call",
|
|
318
|
+
description=(
|
|
319
|
+
"Execute a downstream tool by name. Use after mcp_router_select "
|
|
320
|
+
"to find the right tool, then call this with the tool_name and "
|
|
321
|
+
"arguments from the select response."
|
|
322
|
+
),
|
|
323
|
+
inputSchema={
|
|
324
|
+
"type": "object",
|
|
325
|
+
"properties": {
|
|
326
|
+
"tool_name": {
|
|
327
|
+
"type": "string",
|
|
328
|
+
"description": "Tool name from mcp_router_select response (e.g. 'time_get_current_time').",
|
|
329
|
+
},
|
|
330
|
+
"arguments": {
|
|
331
|
+
"type": "object",
|
|
332
|
+
"description": "Arguments object matching the tool's inputSchema.",
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
"required": ["tool_name", "arguments"],
|
|
336
|
+
},
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@server.list_tools()
|
|
341
|
+
async def list_tools() -> list[Tool]:
|
|
342
|
+
"""Return the current tool list."""
|
|
343
|
+
return current_tools
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
@server.call_tool()
|
|
347
|
+
async def call_tool(name: str, arguments: dict | None) -> list[TextContent] | list[ImageContent] | list[EmbeddedResource]:
|
|
348
|
+
"""Tool call routing.
|
|
349
|
+
|
|
350
|
+
ponytail: Hermes may pass a prefixed name (mcp_router_time_get_current_time).
|
|
351
|
+
Strip mcp_router_ if the stripped name matches the registry or select.
|
|
352
|
+
"""
|
|
353
|
+
args = arguments or {}
|
|
354
|
+
|
|
355
|
+
# ponytail: insurance — if the client passes a prefixed name, strip it
|
|
356
|
+
if name.startswith("mcp_router_"):
|
|
357
|
+
stripped = name[len("mcp_router_"):]
|
|
358
|
+
if stripped in ("mcp_router_select", "mcp_router_call") or stripped in tool_registry:
|
|
359
|
+
name = stripped
|
|
360
|
+
|
|
361
|
+
if name == "mcp_router_select":
|
|
362
|
+
return await _handle_select(args.get("task", ""))
|
|
363
|
+
|
|
364
|
+
if name == "mcp_router_call":
|
|
365
|
+
tool_name = args.get("tool_name", "")
|
|
366
|
+
tool_args = args.get("arguments", {})
|
|
367
|
+
if isinstance(tool_args, str):
|
|
368
|
+
try:
|
|
369
|
+
tool_args = json.loads(tool_args)
|
|
370
|
+
except Exception as e:
|
|
371
|
+
return _text(json.dumps({"error": f"failed to parse arguments JSON: {e}"}, ensure_ascii=False))
|
|
372
|
+
|
|
373
|
+
if not tool_name:
|
|
374
|
+
return _text(json.dumps({"error": "tool_name is required"}, ensure_ascii=False))
|
|
375
|
+
|
|
376
|
+
resolved = _resolve(tool_name)
|
|
377
|
+
if resolved is None:
|
|
378
|
+
return _text(json.dumps({"error": f"unknown tool: {tool_name}"}, ensure_ascii=False))
|
|
379
|
+
return await _invoke(tool_registry[resolved], tool_args)
|
|
380
|
+
|
|
381
|
+
# Direct downstream call by registry name
|
|
382
|
+
resolved = _resolve(name)
|
|
383
|
+
if resolved is not None:
|
|
384
|
+
return await _invoke(tool_registry[resolved], args)
|
|
385
|
+
|
|
386
|
+
return _text(json.dumps({"error": f"unknown tool: {name}"}, ensure_ascii=False))
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
async def _handle_select(task: str) -> list[TextContent]:
|
|
390
|
+
"""Core: semantic search for tools matching the task."""
|
|
391
|
+
if not task:
|
|
392
|
+
return _text('{"error": "task is required"}')
|
|
393
|
+
|
|
394
|
+
if not tool_registry:
|
|
395
|
+
return _text(json.dumps({
|
|
396
|
+
"info": "no downstream tools registered yet",
|
|
397
|
+
"available_tools": [t.name for t in current_tools],
|
|
398
|
+
}, ensure_ascii=False))
|
|
399
|
+
|
|
400
|
+
if not embeddings_cache:
|
|
401
|
+
# no embeddings — return all tools, don't break the flow
|
|
402
|
+
all_ns_tools = [entry.tool for entry in tool_registry.values()]
|
|
403
|
+
_set_current_tools(all_ns_tools)
|
|
404
|
+
return _text(json.dumps({
|
|
405
|
+
"warning": "embeddings unavailable; returning all tools",
|
|
406
|
+
"tools": [_tool_dict(t) for t in all_ns_tools],
|
|
407
|
+
"hint": "Use mcp_router_call(tool_name=<name>, arguments=<args>) to execute any of these tools.",
|
|
408
|
+
}, ensure_ascii=False))
|
|
409
|
+
|
|
410
|
+
q_vec = await embed(task)
|
|
411
|
+
scored: list[tuple[float, Tool]] = []
|
|
412
|
+
for ns_name, vec in embeddings_cache.items():
|
|
413
|
+
entry = tool_registry[ns_name]
|
|
414
|
+
scored.append((cosine_sim(q_vec, vec), entry.tool))
|
|
415
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
416
|
+
|
|
417
|
+
top = [t for _, t in scored[:TOP_N]]
|
|
418
|
+
_set_current_tools(top)
|
|
419
|
+
|
|
420
|
+
# notify the client (best-effort, only works inside a request context)
|
|
421
|
+
try:
|
|
422
|
+
ctx = server.request_context
|
|
423
|
+
await ctx.session.send_tool_list_changed()
|
|
424
|
+
log.info("sent tools/list_changed, now exposing %d tools", len(top))
|
|
425
|
+
except (LookupError, AttributeError) as e:
|
|
426
|
+
log.warning("cannot send list_changed outside active request: %s", e)
|
|
427
|
+
except Exception as e:
|
|
428
|
+
log.warning("could not send list_changed: %s", e)
|
|
429
|
+
|
|
430
|
+
payload = {
|
|
431
|
+
"selected_tools": [_tool_dict(t) for t in top],
|
|
432
|
+
"hint": "Use mcp_router_call(tool_name=<name>, arguments=<args>) to execute any of these tools.",
|
|
433
|
+
}
|
|
434
|
+
return _text(json.dumps(payload, ensure_ascii=False))
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def _set_current_tools(tools: list[Tool]) -> None:
|
|
438
|
+
"""Update current_tools. Always keep mcp_router_select and mcp_router_call first."""
|
|
439
|
+
global current_tools
|
|
440
|
+
others = [t for t in tools if t.name not in ("mcp_router_select", "mcp_router_call")]
|
|
441
|
+
current_tools = [ROUTER_SELECT_TOOL, ROUTER_CALL_TOOL] + others
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# --- Entrypoint --------------------------------------------------------------
|
|
445
|
+
|
|
446
|
+
async def main_async() -> None:
|
|
447
|
+
_set_current_tools([])
|
|
448
|
+
|
|
449
|
+
ready = [asyncio.Event() for _ in DOWNSTREAM]
|
|
450
|
+
supervisors = [
|
|
451
|
+
asyncio.create_task(_supervise(ds, ev), name=f"supervise-{ds['name']}")
|
|
452
|
+
for ds, ev in zip(DOWNSTREAM, ready)
|
|
453
|
+
]
|
|
454
|
+
# Wait for the first connect attempt of each downstream so tools/list isn't
|
|
455
|
+
# empty on the first select — bounded so one slow server can't stall startup.
|
|
456
|
+
if ready:
|
|
457
|
+
with contextlib.suppress(asyncio.TimeoutError):
|
|
458
|
+
await asyncio.wait_for(
|
|
459
|
+
asyncio.gather(*(e.wait() for e in ready)), timeout=STARTUP_TIMEOUT
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
log.info("starting mcp-sieve on stdio, %d downstream connected, %d tools registered",
|
|
463
|
+
len(downstream_sessions), len(tool_registry))
|
|
464
|
+
try:
|
|
465
|
+
async with stdio_server() as (read, write):
|
|
466
|
+
await server.run(
|
|
467
|
+
read,
|
|
468
|
+
write,
|
|
469
|
+
server.create_initialization_options(NotificationOptions(tools_changed=True)),
|
|
470
|
+
)
|
|
471
|
+
finally:
|
|
472
|
+
for t in supervisors:
|
|
473
|
+
t.cancel()
|
|
474
|
+
await asyncio.gather(*supervisors, return_exceptions=True)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def main() -> None:
|
|
478
|
+
asyncio.run(main_async())
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
if __name__ == "__main__":
|
|
482
|
+
main()
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""ponytail self-checks for env config (#2) and reconnect supervisor (#3).
|
|
2
|
+
|
|
3
|
+
Run: python test_transport_env_reconnect.py
|
|
4
|
+
No framework — plain asserts. Fails loudly if the logic breaks.
|
|
5
|
+
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
sys.path.insert(0, "src")
|
|
11
|
+
import mcp_router.server as s
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_env_downstream():
|
|
15
|
+
env = {
|
|
16
|
+
"MCP_SIEVE_DOWNSTREAM_1_NAME": "time",
|
|
17
|
+
"MCP_SIEVE_DOWNSTREAM_1_COMMAND": "uvx",
|
|
18
|
+
"MCP_SIEVE_DOWNSTREAM_1_ARGS": "mcp-server-time --local", # whitespace split
|
|
19
|
+
"MCP_SIEVE_DOWNSTREAM_2_NAME": "remote",
|
|
20
|
+
"MCP_SIEVE_DOWNSTREAM_2_URL": "https://example.com/mcp",
|
|
21
|
+
"MCP_SIEVE_DOWNSTREAM_2_ARGS": '["--flag", "with space"]', # JSON keeps the quoted arg
|
|
22
|
+
# gap at 3 stops the scan; a stray 4 must be ignored
|
|
23
|
+
"MCP_SIEVE_DOWNSTREAM_4_NAME": "ignored",
|
|
24
|
+
}
|
|
25
|
+
old = dict(os.environ)
|
|
26
|
+
os.environ.update(env)
|
|
27
|
+
try:
|
|
28
|
+
ds = s._env_downstream()
|
|
29
|
+
finally:
|
|
30
|
+
os.environ.clear()
|
|
31
|
+
os.environ.update(old)
|
|
32
|
+
|
|
33
|
+
assert [d["name"] for d in ds] == ["time", "remote"], ds
|
|
34
|
+
assert ds[0]["args"] == ["mcp-server-time", "--local"], ds[0]
|
|
35
|
+
assert ds[1]["url"] == "https://example.com/mcp"
|
|
36
|
+
assert ds[1]["args"] == ["--flag", "with space"], ds[1] # JSON preserved the space
|
|
37
|
+
print("ok: _env_downstream")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_apply_env_merge():
|
|
41
|
+
cfg = {"downstream": [{"name": "time", "command": "old"}, {"name": "git", "command": "git"}]}
|
|
42
|
+
old = dict(os.environ)
|
|
43
|
+
os.environ.update({
|
|
44
|
+
"MCP_SIEVE_DOWNSTREAM_1_NAME": "time", # same name → replaces
|
|
45
|
+
"MCP_SIEVE_DOWNSTREAM_1_COMMAND": "new",
|
|
46
|
+
"MCP_SIEVE_DOWNSTREAM_2_NAME": "arxiv", # new name → appends
|
|
47
|
+
"MCP_SIEVE_DOWNSTREAM_2_COMMAND": "uvx",
|
|
48
|
+
"MCP_SIEVE_TOP_N": "5",
|
|
49
|
+
})
|
|
50
|
+
try:
|
|
51
|
+
out = s._apply_env(cfg)
|
|
52
|
+
finally:
|
|
53
|
+
os.environ.clear()
|
|
54
|
+
os.environ.update(old)
|
|
55
|
+
|
|
56
|
+
by_name = {d["name"]: d for d in out["downstream"]}
|
|
57
|
+
assert by_name["time"]["command"] == "new", by_name["time"]
|
|
58
|
+
assert by_name["git"]["command"] == "git" # untouched yaml entry survives
|
|
59
|
+
assert by_name["arxiv"]["command"] == "uvx" # env-only entry added
|
|
60
|
+
assert out["embeddings"]["top_n"] == 5
|
|
61
|
+
print("ok: _apply_env merge")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_reconnect_supervisor():
|
|
65
|
+
"""_supervise must survive a failed first connect and re-heal after the session dies."""
|
|
66
|
+
attempts = {"n": 0}
|
|
67
|
+
|
|
68
|
+
class FakeSession:
|
|
69
|
+
def __init__(self):
|
|
70
|
+
self.pinged = False
|
|
71
|
+
|
|
72
|
+
async def send_ping(self):
|
|
73
|
+
if self.pinged: # dies on the 2nd ping
|
|
74
|
+
raise ConnectionError("transport gone")
|
|
75
|
+
self.pinged = True
|
|
76
|
+
|
|
77
|
+
async def fake_connect(ds, stack):
|
|
78
|
+
attempts["n"] += 1
|
|
79
|
+
if attempts["n"] == 1:
|
|
80
|
+
raise ConnectionError("first attempt fails")
|
|
81
|
+
return FakeSession()
|
|
82
|
+
|
|
83
|
+
async def noop_register(name, session):
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
async def run():
|
|
87
|
+
s._connect = fake_connect
|
|
88
|
+
s._register_tools = noop_register
|
|
89
|
+
s.RECONNECT_BASE = s.RECONNECT_CAP = s.HEALTH_INTERVAL = 0.01
|
|
90
|
+
ready = asyncio.Event()
|
|
91
|
+
task = asyncio.create_task(s._supervise({"name": "x"}, ready))
|
|
92
|
+
await asyncio.sleep(0.3)
|
|
93
|
+
task.cancel()
|
|
94
|
+
await asyncio.gather(task, return_exceptions=True)
|
|
95
|
+
assert ready.is_set() # startup unblocked despite first failure
|
|
96
|
+
assert attempts["n"] >= 3, attempts # failed once, connected+died, reconnected
|
|
97
|
+
assert "x" in s.downstream_sessions # healed: a live session is registered
|
|
98
|
+
|
|
99
|
+
asyncio.run(run())
|
|
100
|
+
print("ok: _supervise reconnect")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
test_env_downstream()
|
|
105
|
+
test_apply_env_merge()
|
|
106
|
+
test_reconnect_supervisor()
|
|
107
|
+
print("all passed")
|