@aiwerk/mcp-bridge 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -47
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.js +2 -0
- package/dist/src/smart-filter.d.ts +129 -0
- package/dist/src/smart-filter.js +559 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
# @aiwerk/mcp-bridge
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Multiplex multiple MCP servers into one interface. One config, one connection, all your tools.
|
|
4
|
+
|
|
5
|
+
Works with **Claude Desktop**, **Cursor**, **Windsurf**, **Cline**, **OpenClaw**, or any MCP client.
|
|
6
|
+
|
|
7
|
+
## Why?
|
|
8
|
+
|
|
9
|
+
Most AI agents connect to MCP servers one-by-one. With 10+ servers, that's 10+ connections, 200+ tools in context, and thousands of wasted tokens.
|
|
10
|
+
|
|
11
|
+
**MCP Bridge** solves this:
|
|
12
|
+
- **Router mode**: all servers behind one `mcp` meta-tool (~99% token reduction)
|
|
13
|
+
- **Direct mode**: all tools registered individually with automatic prefixing
|
|
14
|
+
- **3 transports**: stdio, SSE, streamable-http
|
|
15
|
+
- **Built-in catalog**: install popular servers with one command
|
|
16
|
+
- **Zero config secrets in files**: `${ENV_VAR}` resolution from `.env`
|
|
4
17
|
|
|
5
18
|
## Install
|
|
6
19
|
|
|
@@ -11,22 +24,62 @@ npm install -g @aiwerk/mcp-bridge
|
|
|
11
24
|
## Quick Start
|
|
12
25
|
|
|
13
26
|
```bash
|
|
14
|
-
# Initialize config
|
|
27
|
+
# 1. Initialize config
|
|
15
28
|
mcp-bridge init
|
|
16
29
|
|
|
17
|
-
#
|
|
18
|
-
vi ~/.mcp-bridge/config.json
|
|
19
|
-
|
|
20
|
-
# Install a server from the catalog
|
|
30
|
+
# 2. Install a server from the catalog
|
|
21
31
|
mcp-bridge install todoist
|
|
22
32
|
|
|
23
|
-
#
|
|
33
|
+
# 3. Add your API key
|
|
34
|
+
echo "TODOIST_API_TOKEN=your-token" >> ~/.mcp-bridge/.env
|
|
35
|
+
|
|
36
|
+
# 4. Start (stdio mode — connects to any MCP client)
|
|
24
37
|
mcp-bridge
|
|
25
38
|
```
|
|
26
39
|
|
|
40
|
+
## Use with Claude Desktop
|
|
41
|
+
|
|
42
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"mcpServers": {
|
|
47
|
+
"bridge": {
|
|
48
|
+
"command": "mcp-bridge",
|
|
49
|
+
"args": []
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Use with Cursor / Windsurf
|
|
56
|
+
|
|
57
|
+
Add to your MCP config:
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"mcpServers": {
|
|
62
|
+
"bridge": {
|
|
63
|
+
"command": "mcp-bridge",
|
|
64
|
+
"args": ["--config", "/path/to/config.json"]
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Use with OpenClaw
|
|
71
|
+
|
|
72
|
+
Install as a plugin (handles everything automatically):
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
openclaw plugins install @aiwerk/openclaw-mcp-bridge
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
See [@aiwerk/openclaw-mcp-bridge](https://github.com/AIWerk/openclaw-mcp-bridge) for details.
|
|
79
|
+
|
|
27
80
|
## Configuration
|
|
28
81
|
|
|
29
|
-
Config
|
|
82
|
+
Config: `~/.mcp-bridge/config.json` | Secrets: `~/.mcp-bridge/.env`
|
|
30
83
|
|
|
31
84
|
```json
|
|
32
85
|
{
|
|
@@ -36,19 +89,22 @@ Config location: `~/.mcp-bridge/config.json`
|
|
|
36
89
|
"transport": "stdio",
|
|
37
90
|
"command": "npx",
|
|
38
91
|
"args": ["-y", "@doist/todoist-ai"],
|
|
39
|
-
"env": {
|
|
40
|
-
"TODOIST_API_KEY": "${TODOIST_API_TOKEN}"
|
|
41
|
-
},
|
|
92
|
+
"env": { "TODOIST_API_KEY": "${TODOIST_API_TOKEN}" },
|
|
42
93
|
"description": "Task management"
|
|
43
94
|
},
|
|
44
95
|
"github": {
|
|
45
96
|
"transport": "stdio",
|
|
46
|
-
"command": "
|
|
47
|
-
"args": ["
|
|
48
|
-
"env": {
|
|
49
|
-
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
|
|
50
|
-
},
|
|
97
|
+
"command": "npx",
|
|
98
|
+
"args": ["-y", "@modelcontextprotocol/server-github"],
|
|
99
|
+
"env": { "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" },
|
|
51
100
|
"description": "GitHub repos, issues, PRs"
|
|
101
|
+
},
|
|
102
|
+
"notion": {
|
|
103
|
+
"transport": "stdio",
|
|
104
|
+
"command": "npx",
|
|
105
|
+
"args": ["-y", "@modelcontextprotocol/server-notion"],
|
|
106
|
+
"env": { "NOTION_API_KEY": "${NOTION_TOKEN}" },
|
|
107
|
+
"description": "Notion pages and databases"
|
|
52
108
|
}
|
|
53
109
|
},
|
|
54
110
|
"toolPrefix": true,
|
|
@@ -57,61 +113,123 @@ Config location: `~/.mcp-bridge/config.json`
|
|
|
57
113
|
}
|
|
58
114
|
```
|
|
59
115
|
|
|
60
|
-
|
|
116
|
+
### Modes
|
|
117
|
+
|
|
118
|
+
| Mode | Tools exposed | Best for |
|
|
119
|
+
|------|--------------|----------|
|
|
120
|
+
| `router` (default) | Single `mcp` meta-tool | 3+ servers, token-conscious agents |
|
|
121
|
+
| `direct` | All tools individually | Few servers, simple agents |
|
|
122
|
+
|
|
123
|
+
**Router mode** — the agent calls `mcp(server="todoist", action="list")` to discover, then `mcp(server="todoist", tool="find-tasks", params={...})` to execute.
|
|
124
|
+
|
|
125
|
+
**Direct mode** — tools are registered as `todoist_find_tasks`, `github_list_repos`, etc.
|
|
126
|
+
|
|
127
|
+
### Transports
|
|
128
|
+
|
|
129
|
+
| Transport | Config key | Use case |
|
|
130
|
+
|-----------|-----------|----------|
|
|
131
|
+
| `stdio` | `command`, `args` | Local CLI servers (most common) |
|
|
132
|
+
| `sse` | `url`, `headers` | Remote SSE servers |
|
|
133
|
+
| `streamable-http` | `url`, `headers` | Modern HTTP-based servers |
|
|
134
|
+
|
|
135
|
+
### Environment variables
|
|
136
|
+
|
|
137
|
+
Secrets go in `~/.mcp-bridge/.env` (chmod 600 on init):
|
|
61
138
|
|
|
62
139
|
```
|
|
63
140
|
TODOIST_API_TOKEN=your-token-here
|
|
64
141
|
GITHUB_TOKEN=ghp_xxxxx
|
|
142
|
+
NOTION_TOKEN=ntn_xxxxx
|
|
65
143
|
```
|
|
66
144
|
|
|
67
|
-
|
|
145
|
+
Use `${VAR_NAME}` in config — resolved from `.env` + system env.
|
|
68
146
|
|
|
69
|
-
|
|
147
|
+
## CLI Reference
|
|
70
148
|
|
|
71
|
-
|
|
149
|
+
```bash
|
|
150
|
+
mcp-bridge # Start in stdio mode (default)
|
|
151
|
+
mcp-bridge --sse --port 3000 # Start as SSE server
|
|
152
|
+
mcp-bridge --http --port 3000 # Start as HTTP server
|
|
153
|
+
mcp-bridge --verbose # Info-level logs to stderr
|
|
154
|
+
mcp-bridge --debug # Full protocol logs to stderr
|
|
155
|
+
mcp-bridge --config ./my.json # Custom config file
|
|
156
|
+
|
|
157
|
+
mcp-bridge init # Create ~/.mcp-bridge/ with template
|
|
158
|
+
mcp-bridge install <server> # Install from catalog
|
|
159
|
+
mcp-bridge catalog # List available servers
|
|
160
|
+
mcp-bridge servers # List configured servers
|
|
161
|
+
mcp-bridge search <query> # Search catalog by keyword
|
|
162
|
+
mcp-bridge update [--check] # Check for / install updates
|
|
163
|
+
mcp-bridge --version # Print version
|
|
164
|
+
```
|
|
72
165
|
|
|
73
|
-
##
|
|
166
|
+
## Server Catalog
|
|
74
167
|
|
|
75
|
-
|
|
168
|
+
Built-in catalog with pre-configured servers:
|
|
76
169
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
170
|
+
| Server | Transport | Description |
|
|
171
|
+
|--------|-----------|-------------|
|
|
172
|
+
| todoist | stdio | Task management |
|
|
173
|
+
| github | stdio | Repos, issues, PRs |
|
|
174
|
+
| notion | stdio | Pages and databases |
|
|
175
|
+
| stripe | stdio | Payments and billing |
|
|
176
|
+
| linear | stdio | Project management |
|
|
177
|
+
| google-maps | stdio | Places, geocoding, directions |
|
|
178
|
+
| hetzner | stdio | Cloud infrastructure |
|
|
179
|
+
| miro | stdio | Collaborative whiteboard |
|
|
180
|
+
| wise | stdio | International payments |
|
|
181
|
+
| tavily | stdio | AI-optimized web search |
|
|
182
|
+
| apify | streamable-http | Web scraping and automation |
|
|
89
183
|
|
|
90
184
|
```bash
|
|
91
|
-
mcp-bridge
|
|
92
|
-
mcp-bridge
|
|
93
|
-
mcp-bridge
|
|
94
|
-
mcp-bridge --config ./config.json # custom config file
|
|
95
|
-
|
|
96
|
-
mcp-bridge init # create ~/.mcp-bridge/ with template
|
|
97
|
-
mcp-bridge install <server> # install from catalog
|
|
98
|
-
mcp-bridge catalog # list available servers
|
|
99
|
-
mcp-bridge servers # list configured servers
|
|
100
|
-
mcp-bridge search <query> # search catalog
|
|
101
|
-
mcp-bridge update --check # check for updates
|
|
102
|
-
mcp-bridge update # install updates
|
|
185
|
+
mcp-bridge install todoist # Interactive setup with API key prompt
|
|
186
|
+
mcp-bridge catalog # Full list
|
|
187
|
+
mcp-bridge search payments # Search by keyword
|
|
103
188
|
```
|
|
104
189
|
|
|
105
190
|
## Library Usage
|
|
106
191
|
|
|
192
|
+
Use as a dependency in your own MCP server or OpenClaw plugin:
|
|
193
|
+
|
|
107
194
|
```typescript
|
|
108
195
|
import { McpRouter, StandaloneServer, loadConfig } from "@aiwerk/mcp-bridge";
|
|
109
196
|
|
|
197
|
+
// Quick start
|
|
110
198
|
const config = loadConfig({ configPath: "./config.json" });
|
|
111
199
|
const server = new StandaloneServer(config, console);
|
|
112
200
|
await server.startStdio();
|
|
113
201
|
```
|
|
114
202
|
|
|
203
|
+
```typescript
|
|
204
|
+
// Use the router directly
|
|
205
|
+
import { McpRouter } from "@aiwerk/mcp-bridge";
|
|
206
|
+
|
|
207
|
+
const router = new McpRouter(servers, config, logger);
|
|
208
|
+
const result = await router.dispatch("todoist", "call", "find-tasks", { query: "today" });
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Architecture
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
┌─────────────────┐ ┌──────────────────────────────────────┐
|
|
215
|
+
│ Claude Desktop │ │ MCP Bridge │
|
|
216
|
+
│ Cursor │◄───►│ │
|
|
217
|
+
│ Windsurf │stdio│ ┌─────────┐ ┌──────────────────┐ │
|
|
218
|
+
│ OpenClaw │ │ │ Router / │ │ Backend servers: │ │
|
|
219
|
+
│ Any MCP client │ │ │ Direct │──│ • todoist (stdio) │ │
|
|
220
|
+
└─────────────────┘ │ │ mode │ │ • github (stdio) │ │
|
|
221
|
+
│ └─────────┘ │ • notion (stdio) │ │
|
|
222
|
+
│ │ • stripe (sse) │ │
|
|
223
|
+
│ └──────────────────┘ │
|
|
224
|
+
└──────────────────────────────────────┘
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Related
|
|
228
|
+
|
|
229
|
+
- **[@aiwerk/openclaw-mcp-bridge](https://github.com/AIWerk/openclaw-mcp-bridge)** — OpenClaw plugin wrapper (uses this package as core)
|
|
230
|
+
- **[MCP Specification](https://spec.modelcontextprotocol.io)** — Model Context Protocol spec
|
|
231
|
+
- **[Awesome MCP Servers](https://github.com/punkpeye/awesome-mcp-servers)** — Community server directory
|
|
232
|
+
|
|
115
233
|
## License
|
|
116
234
|
|
|
117
|
-
MIT
|
|
235
|
+
MIT — [AIWerk](https://aiwerk.ch)
|
package/dist/src/index.d.ts
CHANGED
|
@@ -13,3 +13,4 @@ export { pickRegisteredToolName } from "./tool-naming.js";
|
|
|
13
13
|
export { StandaloneServer } from "./standalone-server.js";
|
|
14
14
|
export { checkForUpdate, getUpdateNotice, runUpdate, resetNoticeFlag } from "./update-checker.js";
|
|
15
15
|
export type { UpdateInfo } from "./update-checker.js";
|
|
16
|
+
export { filterServers, buildFilteredDescription } from "./smart-filter.js";
|
package/dist/src/index.js
CHANGED
|
@@ -19,3 +19,5 @@ export { pickRegisteredToolName } from "./tool-naming.js";
|
|
|
19
19
|
export { StandaloneServer } from "./standalone-server.js";
|
|
20
20
|
// Update checker
|
|
21
21
|
export { checkForUpdate, getUpdateNotice, runUpdate, resetNoticeFlag } from "./update-checker.js";
|
|
22
|
+
// Smart filter
|
|
23
|
+
export { filterServers, buildFilteredDescription } from "./smart-filter.js";
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Filter v2 - Phase 1: Keyword-based filtering
|
|
3
|
+
* Zero external dependencies, graceful degradation
|
|
4
|
+
*/
|
|
5
|
+
import type { Logger, McpServerConfig, McpTool } from "./types.js";
|
|
6
|
+
/** Smart filter configuration for router mode. */
|
|
7
|
+
export interface SmartFilterConfig {
|
|
8
|
+
enabled?: boolean;
|
|
9
|
+
embedding?: "auto" | "ollama" | "openai" | "gemini" | "keyword";
|
|
10
|
+
topServers?: number;
|
|
11
|
+
hardCap?: number;
|
|
12
|
+
topTools?: number;
|
|
13
|
+
serverThreshold?: number;
|
|
14
|
+
toolThreshold?: number;
|
|
15
|
+
fallback?: "keyword";
|
|
16
|
+
alwaysInclude?: string[];
|
|
17
|
+
timeoutMs?: number;
|
|
18
|
+
telemetry?: boolean;
|
|
19
|
+
}
|
|
20
|
+
/** Extended server config with optional keywords for smart filter. */
|
|
21
|
+
export interface PluginServerConfig extends McpServerConfig {
|
|
22
|
+
keywords?: string[];
|
|
23
|
+
}
|
|
24
|
+
export type OpenClawLogger = Logger;
|
|
25
|
+
export interface FilterableServer {
|
|
26
|
+
name: string;
|
|
27
|
+
description: string;
|
|
28
|
+
keywords: string[];
|
|
29
|
+
tools: McpTool[];
|
|
30
|
+
}
|
|
31
|
+
export interface FilterResult {
|
|
32
|
+
servers: FilterableServer[];
|
|
33
|
+
tools: Array<{
|
|
34
|
+
serverId: string;
|
|
35
|
+
tool: McpTool;
|
|
36
|
+
}>;
|
|
37
|
+
metadata: {
|
|
38
|
+
queryUsed: string;
|
|
39
|
+
totalServersBeforeFilter: number;
|
|
40
|
+
totalToolsBeforeFilter: number;
|
|
41
|
+
filterMode: "keyword" | "disabled";
|
|
42
|
+
timeoutOccurred: boolean;
|
|
43
|
+
confidenceScore?: number;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export interface UserTurn {
|
|
47
|
+
content: string;
|
|
48
|
+
timestamp: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Smart Filter implementation - Phase 1
|
|
52
|
+
*/
|
|
53
|
+
export declare class SmartFilter {
|
|
54
|
+
private config;
|
|
55
|
+
private logger;
|
|
56
|
+
constructor(config: SmartFilterConfig, logger: OpenClawLogger);
|
|
57
|
+
/**
|
|
58
|
+
* Main filter entry point
|
|
59
|
+
*/
|
|
60
|
+
filter(servers: Record<string, PluginServerConfig>, allTools: Map<string, McpTool[]>, userTurns: UserTurn[]): Promise<FilterResult>;
|
|
61
|
+
private performFilter;
|
|
62
|
+
/**
|
|
63
|
+
* Extract meaningful intent from last 1-3 user turns
|
|
64
|
+
*/
|
|
65
|
+
private synthesizeQuery;
|
|
66
|
+
private extractMeaningfulContent;
|
|
67
|
+
private prepareFilterableServers;
|
|
68
|
+
private normalizeKeywords;
|
|
69
|
+
/**
|
|
70
|
+
* Score servers using weighted overlap scoring
|
|
71
|
+
*/
|
|
72
|
+
private scoreServers;
|
|
73
|
+
private tokenize;
|
|
74
|
+
private calculateServerScore;
|
|
75
|
+
private getSemanticScore;
|
|
76
|
+
private countOverlap;
|
|
77
|
+
/**
|
|
78
|
+
* Select servers using dynamic topServers with confidence-based expansion
|
|
79
|
+
*/
|
|
80
|
+
private selectServers;
|
|
81
|
+
/**
|
|
82
|
+
* Filter tools within selected servers
|
|
83
|
+
*/
|
|
84
|
+
private filterTools;
|
|
85
|
+
private calculateToolScore;
|
|
86
|
+
private calculateConfidenceScore;
|
|
87
|
+
private createUnfilteredResult;
|
|
88
|
+
private logTelemetry;
|
|
89
|
+
}
|
|
90
|
+
export declare const DEFAULTS: Required<SmartFilterConfig>;
|
|
91
|
+
/** Lowercase, split on whitespace + punctuation, preserve numbers, drop empties. */
|
|
92
|
+
export declare function tokenize(text: string): string[];
|
|
93
|
+
/** Normalize keywords: lowercase, trim, dedup, strip empties, cap at MAX_KEYWORDS. */
|
|
94
|
+
export declare function validateKeywords(raw: string[]): string[];
|
|
95
|
+
/**
|
|
96
|
+
* Extract a meaningful intent string from the last 1-3 user turns.
|
|
97
|
+
* Returns null if no meaningful query can be extracted.
|
|
98
|
+
*/
|
|
99
|
+
export declare function synthesizeQuery(userTurns: string[]): string | null;
|
|
100
|
+
export interface ServerScore {
|
|
101
|
+
name: string;
|
|
102
|
+
score: number;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Score a single server against a query using weighted word overlap.
|
|
106
|
+
* desc_matches * 1.0 + kw_only_matches * 0.5, normalized by query length.
|
|
107
|
+
*/
|
|
108
|
+
export declare function scoreServer(queryTokens: string[], serverName: string, description: string, keywords: string[]): number;
|
|
109
|
+
/** Score all servers, return sorted highest-first. */
|
|
110
|
+
export declare function scoreAllServers(queryTokens: string[], servers: Record<string, PluginServerConfig>): ServerScore[];
|
|
111
|
+
/**
|
|
112
|
+
* Select top servers with dynamic expansion toward hardCap.
|
|
113
|
+
* If top score < threshold AND gap small → show all (true uncertainty).
|
|
114
|
+
*/
|
|
115
|
+
export declare function selectTopServers(scores: ServerScore[], topServers: number, hardCap: number, threshold: number, alwaysInclude: string[]): string[];
|
|
116
|
+
export interface SmartFilterResult {
|
|
117
|
+
filteredServers: string[];
|
|
118
|
+
allServers: string[];
|
|
119
|
+
query: string | null;
|
|
120
|
+
scores: ServerScore[];
|
|
121
|
+
reason: "filtered" | "no-query" | "timeout" | "error" | "disabled";
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Run the smart filter. Returns the list of server names to include.
|
|
125
|
+
* Guarantees: never throws, never blocks longer than timeoutMs.
|
|
126
|
+
*/
|
|
127
|
+
export declare function filterServers(servers: Record<string, PluginServerConfig>, userTurns: string[], config: SmartFilterConfig, logger?: OpenClawLogger): SmartFilterResult;
|
|
128
|
+
/** Build a filtered router tool description string. */
|
|
129
|
+
export declare function buildFilteredDescription(allServers: Record<string, PluginServerConfig>, filteredNames: string[]): string;
|
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Filter v2 - Phase 1: Keyword-based filtering
|
|
3
|
+
* Zero external dependencies, graceful degradation
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Smart Filter implementation - Phase 1
|
|
7
|
+
*/
|
|
8
|
+
export class SmartFilter {
|
|
9
|
+
config;
|
|
10
|
+
logger;
|
|
11
|
+
constructor(config, logger) {
|
|
12
|
+
// Apply defaults
|
|
13
|
+
this.config = {
|
|
14
|
+
enabled: config.enabled ?? false,
|
|
15
|
+
embedding: config.embedding ?? "auto",
|
|
16
|
+
topServers: config.topServers ?? 5,
|
|
17
|
+
hardCap: config.hardCap ?? 8,
|
|
18
|
+
topTools: config.topTools ?? 10,
|
|
19
|
+
serverThreshold: config.serverThreshold ?? 0.01, // Very low threshold for maximum recall
|
|
20
|
+
toolThreshold: config.toolThreshold ?? 0.05, // Much lower threshold for better recall
|
|
21
|
+
fallback: config.fallback ?? "keyword",
|
|
22
|
+
alwaysInclude: config.alwaysInclude ?? [],
|
|
23
|
+
timeoutMs: config.timeoutMs ?? 500,
|
|
24
|
+
telemetry: config.telemetry ?? false,
|
|
25
|
+
};
|
|
26
|
+
this.logger = logger;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Main filter entry point
|
|
30
|
+
*/
|
|
31
|
+
async filter(servers, allTools, userTurns) {
|
|
32
|
+
if (!this.config.enabled) {
|
|
33
|
+
return this.createUnfilteredResult(servers, allTools, "disabled");
|
|
34
|
+
}
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
let timeoutOccurred = false;
|
|
37
|
+
try {
|
|
38
|
+
// Set up timeout
|
|
39
|
+
const timeoutPromise = new Promise((resolve) => {
|
|
40
|
+
setTimeout(() => {
|
|
41
|
+
timeoutOccurred = true;
|
|
42
|
+
this.logger.warn(`[smart-filter] Filter timeout after ${this.config.timeoutMs}ms, falling back to show all`);
|
|
43
|
+
resolve(this.createUnfilteredResult(servers, allTools, "keyword"));
|
|
44
|
+
}, this.config.timeoutMs);
|
|
45
|
+
});
|
|
46
|
+
const filterPromise = this.performFilter(servers, allTools, userTurns);
|
|
47
|
+
const result = await Promise.race([filterPromise, timeoutPromise]);
|
|
48
|
+
result.metadata.timeoutOccurred = timeoutOccurred;
|
|
49
|
+
const duration = Date.now() - startTime;
|
|
50
|
+
if (this.config.telemetry) {
|
|
51
|
+
this.logTelemetry(result, duration);
|
|
52
|
+
}
|
|
53
|
+
return result;
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
this.logger.warn(`[smart-filter] Filter failed: ${error instanceof Error ? error.message : String(error)}, falling back to show all`);
|
|
57
|
+
const result = this.createUnfilteredResult(servers, allTools, "keyword");
|
|
58
|
+
result.metadata.timeoutOccurred = timeoutOccurred;
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
async performFilter(servers, allTools, userTurns) {
|
|
63
|
+
// Step 1: Query synthesis
|
|
64
|
+
const query = this.synthesizeQuery(userTurns);
|
|
65
|
+
if (!query) {
|
|
66
|
+
this.logger.debug("[smart-filter] No meaningful query found, showing all servers");
|
|
67
|
+
return this.createUnfilteredResult(servers, allTools, "keyword", "");
|
|
68
|
+
}
|
|
69
|
+
// Step 2: Prepare filterable servers
|
|
70
|
+
const filterableServers = this.prepareFilterableServers(servers, allTools);
|
|
71
|
+
// Step 3: Level 1 - Server filtering
|
|
72
|
+
const serverScores = this.scoreServers(query, filterableServers);
|
|
73
|
+
const selectedServers = this.selectServers(serverScores, filterableServers);
|
|
74
|
+
// Step 4: Level 2 - Tool filtering
|
|
75
|
+
const toolResults = this.filterTools(query, selectedServers);
|
|
76
|
+
return {
|
|
77
|
+
servers: selectedServers.map(s => s.server),
|
|
78
|
+
tools: toolResults,
|
|
79
|
+
metadata: {
|
|
80
|
+
queryUsed: query,
|
|
81
|
+
totalServersBeforeFilter: Object.keys(servers).length,
|
|
82
|
+
totalToolsBeforeFilter: Array.from(allTools.values()).flat().length,
|
|
83
|
+
filterMode: "keyword",
|
|
84
|
+
timeoutOccurred: false,
|
|
85
|
+
confidenceScore: this.calculateConfidenceScore(serverScores),
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Extract meaningful intent from last 1-3 user turns
|
|
91
|
+
*/
|
|
92
|
+
synthesizeQuery(userTurns) {
|
|
93
|
+
if (!userTurns || userTurns.length === 0) {
|
|
94
|
+
return "";
|
|
95
|
+
}
|
|
96
|
+
// Take last 1-3 turns, newest first
|
|
97
|
+
const recentTurns = userTurns
|
|
98
|
+
.slice(-3)
|
|
99
|
+
.reverse()
|
|
100
|
+
.map(turn => turn.content.trim());
|
|
101
|
+
for (const content of recentTurns) {
|
|
102
|
+
const cleanedQuery = this.extractMeaningfulContent(content);
|
|
103
|
+
if (cleanedQuery.length >= 3) {
|
|
104
|
+
return cleanedQuery;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// If all recent turns are too short, try combining them
|
|
108
|
+
const combined = recentTurns
|
|
109
|
+
.map(content => this.extractMeaningfulContent(content))
|
|
110
|
+
.filter(content => content.length > 0)
|
|
111
|
+
.join(" ")
|
|
112
|
+
.trim();
|
|
113
|
+
return combined.length >= 3 ? combined : "";
|
|
114
|
+
}
|
|
115
|
+
extractMeaningfulContent(content) {
|
|
116
|
+
// Remove metadata patterns
|
|
117
|
+
const cleaned = content
|
|
118
|
+
.replace(/\[.*?\]/g, "") // [timestamps], [commands]
|
|
119
|
+
.replace(/^\s*[>]*\s*/gm, "") // quote markers
|
|
120
|
+
.replace(/^\s*[-*•]\s*/gm, "") // list markers
|
|
121
|
+
.trim();
|
|
122
|
+
// Filter out noise words/confirmations
|
|
123
|
+
const noisePatterns = [
|
|
124
|
+
/^(yes|no|ok|okay|sure|thanks?|thank you)\.?$/i,
|
|
125
|
+
/^(do it|go ahead|proceed)\.?$/i,
|
|
126
|
+
/^(yes,?\s+(do it|go ahead|proceed))\.?$/i,
|
|
127
|
+
/^\?+$/,
|
|
128
|
+
/^\.+$/,
|
|
129
|
+
/^!+$/,
|
|
130
|
+
];
|
|
131
|
+
if (noisePatterns.some(pattern => pattern.test(cleaned))) {
|
|
132
|
+
return "";
|
|
133
|
+
}
|
|
134
|
+
// Remove trailing "please" and other politeness words
|
|
135
|
+
const withoutPoliteness = cleaned
|
|
136
|
+
.replace(/\s+please\.?$/i, "")
|
|
137
|
+
.replace(/\s+thanks?\.?$/i, "")
|
|
138
|
+
.trim();
|
|
139
|
+
return withoutPoliteness;
|
|
140
|
+
}
|
|
141
|
+
prepareFilterableServers(servers, allTools) {
|
|
142
|
+
return Object.entries(servers).map(([name, config]) => ({
|
|
143
|
+
name,
|
|
144
|
+
description: config.description || "",
|
|
145
|
+
keywords: this.normalizeKeywords(config.keywords || []),
|
|
146
|
+
tools: allTools.get(name) || [],
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
normalizeKeywords(keywords) {
|
|
150
|
+
return keywords
|
|
151
|
+
.slice(0, 30) // Max 30 keywords
|
|
152
|
+
.map(kw => kw.toLowerCase().trim())
|
|
153
|
+
.filter(kw => kw.length > 0)
|
|
154
|
+
.filter((kw, index, arr) => arr.indexOf(kw) === index); // Deduplicate
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Score servers using weighted overlap scoring
|
|
158
|
+
*/
|
|
159
|
+
scoreServers(query, servers) {
|
|
160
|
+
const queryWords = this.tokenize(query.toLowerCase());
|
|
161
|
+
return servers.map(server => ({
|
|
162
|
+
server,
|
|
163
|
+
score: this.calculateServerScore(queryWords, server),
|
|
164
|
+
}));
|
|
165
|
+
}
|
|
166
|
+
tokenize(text) {
|
|
167
|
+
return text
|
|
168
|
+
.toLowerCase()
|
|
169
|
+
.replace(/[^\w\s]/g, " ")
|
|
170
|
+
.split(/\s+/)
|
|
171
|
+
.filter(word => word.length > 0);
|
|
172
|
+
}
|
|
173
|
+
calculateServerScore(queryWords, server) {
|
|
174
|
+
if (queryWords.length === 0)
|
|
175
|
+
return 0;
|
|
176
|
+
const descriptionWords = this.tokenize(server.description);
|
|
177
|
+
const keywordWords = server.keywords;
|
|
178
|
+
const allServerWords = [...descriptionWords, ...keywordWords];
|
|
179
|
+
// Calculate overlaps
|
|
180
|
+
const descMatches = this.countOverlap(queryWords, descriptionWords);
|
|
181
|
+
const keywordOnlyMatches = this.countOverlap(queryWords, keywordWords) - descMatches;
|
|
182
|
+
// Add basic synonym matching for common terms
|
|
183
|
+
let semanticMatches = 0;
|
|
184
|
+
for (const queryWord of queryWords) {
|
|
185
|
+
semanticMatches += this.getSemanticScore(queryWord, allServerWords);
|
|
186
|
+
}
|
|
187
|
+
// Also check for partial/substring matches for better recall
|
|
188
|
+
let partialMatches = 0;
|
|
189
|
+
for (const queryWord of queryWords) {
|
|
190
|
+
for (const serverWord of allServerWords) {
|
|
191
|
+
if (queryWord.length > 3 && serverWord.includes(queryWord)) {
|
|
192
|
+
partialMatches += 0.3; // Partial match gets partial credit
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Weighted scoring: description 1.0x, keywords 0.7x, semantic 0.5x, partial matches 0.3x
|
|
197
|
+
const score = (descMatches * 1.0 + Math.max(0, keywordOnlyMatches) * 0.7 + semanticMatches * 0.5 + partialMatches) / queryWords.length;
|
|
198
|
+
return score;
|
|
199
|
+
}
|
|
200
|
+
getSemanticScore(queryWord, serverWords) {
|
|
201
|
+
// Comprehensive synonym/semantic matching
|
|
202
|
+
const synonymMap = {
|
|
203
|
+
// Finance/payment terms
|
|
204
|
+
money: ["payment", "transfer", "currency", "invoice", "billing", "charge", "account", "balance"],
|
|
205
|
+
payment: ["money", "transfer", "invoice", "billing", "charge", "process"],
|
|
206
|
+
send: ["transfer", "payment", "international"],
|
|
207
|
+
transfer: ["send", "payment", "money", "international"],
|
|
208
|
+
invoice: ["bill", "charge", "payment", "billing", "customer"],
|
|
209
|
+
account: ["balance", "money", "payment"],
|
|
210
|
+
balance: ["account", "money"],
|
|
211
|
+
international: ["transfer", "money", "payment"],
|
|
212
|
+
// Task/productivity terms
|
|
213
|
+
task: ["todo", "reminder", "project", "management", "productivity"],
|
|
214
|
+
todo: ["task", "reminder", "management"],
|
|
215
|
+
create: ["add", "new", "task", "issue"],
|
|
216
|
+
project: ["task", "management", "board", "productivity"],
|
|
217
|
+
manage: ["task", "project", "productivity"],
|
|
218
|
+
schedule: ["meeting", "calendar", "appointment"],
|
|
219
|
+
meeting: ["schedule", "calendar"],
|
|
220
|
+
// Development terms
|
|
221
|
+
code: ["repo", "repository", "commit", "branch", "github"],
|
|
222
|
+
issue: ["bug", "ticket", "github", "repository"],
|
|
223
|
+
bug: ["issue", "github"],
|
|
224
|
+
repository: ["repo", "code", "github"],
|
|
225
|
+
commit: ["code", "repository", "github"],
|
|
226
|
+
// Location/maps terms
|
|
227
|
+
location: ["map", "address", "directions", "geocode", "places"],
|
|
228
|
+
directions: ["map", "route", "location"],
|
|
229
|
+
address: ["location", "geocode"],
|
|
230
|
+
geocode: ["address", "location"],
|
|
231
|
+
restaurant: ["location", "places", "map"],
|
|
232
|
+
nearby: ["location", "map"],
|
|
233
|
+
// Storage/document terms
|
|
234
|
+
upload: ["store", "save", "file", "document"],
|
|
235
|
+
document: ["file", "note", "upload", "storage"],
|
|
236
|
+
store: ["save", "upload", "note"],
|
|
237
|
+
notes: ["document", "store"],
|
|
238
|
+
// Infrastructure terms
|
|
239
|
+
deploy: ["infrastructure", "cloud", "server"],
|
|
240
|
+
cloud: ["infrastructure", "deploy"],
|
|
241
|
+
server: ["infrastructure", "monitoring"],
|
|
242
|
+
infrastructure: ["cloud", "server", "deploy"],
|
|
243
|
+
monitoring: ["server", "infrastructure"],
|
|
244
|
+
// Collaboration terms
|
|
245
|
+
whiteboard: ["collaboration", "brainstorming"],
|
|
246
|
+
brainstorming: ["whiteboard", "collaboration"],
|
|
247
|
+
collaboration: ["whiteboard", "design"],
|
|
248
|
+
// Search terms
|
|
249
|
+
search: ["find", "information", "papers"],
|
|
250
|
+
find: ["search", "information"],
|
|
251
|
+
information: ["search", "find"],
|
|
252
|
+
// Web scraping terms
|
|
253
|
+
analyze: ["data", "extract", "website"],
|
|
254
|
+
extract: ["data", "scraping", "website"],
|
|
255
|
+
website: ["scraping", "analyze", "extract"],
|
|
256
|
+
data: ["extract", "analyze", "scraping"],
|
|
257
|
+
traffic: ["website", "analyze"],
|
|
258
|
+
};
|
|
259
|
+
const synonyms = synonymMap[queryWord.toLowerCase()] || [];
|
|
260
|
+
let matches = 0;
|
|
261
|
+
for (const synonym of synonyms) {
|
|
262
|
+
if (serverWords.includes(synonym)) {
|
|
263
|
+
matches += 1;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
return matches;
|
|
267
|
+
}
|
|
268
|
+
countOverlap(words1, words2) {
|
|
269
|
+
const set2 = new Set(words2);
|
|
270
|
+
return words1.filter(word => set2.has(word)).length;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Select servers using dynamic topServers with confidence-based expansion
|
|
274
|
+
*/
|
|
275
|
+
selectServers(serverScores, allServers) {
|
|
276
|
+
// Include always-included servers first
|
|
277
|
+
const alwaysIncluded = allServers
|
|
278
|
+
.filter(s => this.config.alwaysInclude.includes(s.name))
|
|
279
|
+
.map(server => ({ server, score: 1.0 }));
|
|
280
|
+
// Sort all servers by score
|
|
281
|
+
const allScoredServers = serverScores
|
|
282
|
+
.filter(({ server }) => !this.config.alwaysInclude.includes(server.name))
|
|
283
|
+
.sort((a, b) => b.score - a.score);
|
|
284
|
+
// Primary filter: servers that meet threshold
|
|
285
|
+
const thresholdServers = allScoredServers.filter(({ score }) => score >= this.config.serverThreshold);
|
|
286
|
+
// Fallback: if too few servers pass threshold, include more based on ranking
|
|
287
|
+
let scoredServers = thresholdServers;
|
|
288
|
+
if (thresholdServers.length < 2) {
|
|
289
|
+
// Take at least top 3 servers regardless of threshold for better recall
|
|
290
|
+
scoredServers = allScoredServers.slice(0, Math.max(3, this.config.topServers));
|
|
291
|
+
this.logger.debug(`[smart-filter] Only ${thresholdServers.length} servers met threshold, expanding to top ${scoredServers.length}`);
|
|
292
|
+
}
|
|
293
|
+
// Dynamic topServers based on confidence
|
|
294
|
+
let numServers = this.config.topServers;
|
|
295
|
+
if (scoredServers.length >= 2) {
|
|
296
|
+
const topScore = scoredServers[0].score;
|
|
297
|
+
const cutoffScore = scoredServers[Math.min(this.config.topServers - 1, scoredServers.length - 1)].score;
|
|
298
|
+
const gap = topScore - cutoffScore;
|
|
299
|
+
// If gap is small (uncertain), expand toward hard cap
|
|
300
|
+
if (gap < 0.1 && scoredServers.length > numServers) {
|
|
301
|
+
numServers = Math.min(this.config.hardCap, scoredServers.length);
|
|
302
|
+
this.logger.debug(`[smart-filter] Low confidence (gap: ${gap.toFixed(3)}), expanding to ${numServers} servers`);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
const selectedScored = scoredServers.slice(0, numServers);
|
|
306
|
+
return [...alwaysIncluded, ...selectedScored];
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Filter tools within selected servers
|
|
310
|
+
*/
|
|
311
|
+
filterTools(query, selectedServers) {
|
|
312
|
+
const queryWords = this.tokenize(query);
|
|
313
|
+
const allTools = [];
|
|
314
|
+
for (const { server } of selectedServers) {
|
|
315
|
+
for (const tool of server.tools) {
|
|
316
|
+
const score = this.calculateToolScore(queryWords, tool);
|
|
317
|
+
if (score >= this.config.toolThreshold) {
|
|
318
|
+
allTools.push({ serverId: server.name, tool, score });
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
// Sort by score and take top N
|
|
323
|
+
return allTools
|
|
324
|
+
.sort((a, b) => b.score - a.score)
|
|
325
|
+
.slice(0, this.config.topTools)
|
|
326
|
+
.map(({ serverId, tool }) => ({ serverId, tool }));
|
|
327
|
+
}
|
|
328
|
+
calculateToolScore(queryWords, tool) {
|
|
329
|
+
if (queryWords.length === 0)
|
|
330
|
+
return 0;
|
|
331
|
+
const nameWords = this.tokenize(tool.name);
|
|
332
|
+
const descWords = this.tokenize(tool.description || "");
|
|
333
|
+
const nameMatches = this.countOverlap(queryWords, nameWords);
|
|
334
|
+
const descMatches = this.countOverlap(queryWords, descWords) - this.countOverlap(queryWords, nameWords);
|
|
335
|
+
// Weighted: description 1.0x, name 0.5x (name is less descriptive usually)
|
|
336
|
+
const score = (descMatches * 1.0 + nameMatches * 0.5) / queryWords.length;
|
|
337
|
+
return score;
|
|
338
|
+
}
|
|
339
|
+
calculateConfidenceScore(serverScores) {
|
|
340
|
+
if (serverScores.length < 2)
|
|
341
|
+
return 1.0;
|
|
342
|
+
const scores = serverScores.map(s => s.score).sort((a, b) => b - a);
|
|
343
|
+
const topScore = scores[0];
|
|
344
|
+
const secondScore = scores[1];
|
|
345
|
+
// Confidence based on gap between top scores
|
|
346
|
+
if (topScore === 0)
|
|
347
|
+
return 0;
|
|
348
|
+
return Math.min(1.0, (topScore - secondScore) / topScore);
|
|
349
|
+
}
|
|
350
|
+
createUnfilteredResult(servers, allTools, filterMode, queryUsed = "") {
|
|
351
|
+
const filterableServers = this.prepareFilterableServers(servers, allTools);
|
|
352
|
+
const tools = Array.from(allTools.entries()).flatMap(([serverId, tools]) => tools.map(tool => ({ serverId, tool })));
|
|
353
|
+
return {
|
|
354
|
+
servers: filterableServers,
|
|
355
|
+
tools,
|
|
356
|
+
metadata: {
|
|
357
|
+
queryUsed,
|
|
358
|
+
totalServersBeforeFilter: Object.keys(servers).length,
|
|
359
|
+
totalToolsBeforeFilter: tools.length,
|
|
360
|
+
filterMode,
|
|
361
|
+
timeoutOccurred: false,
|
|
362
|
+
},
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
logTelemetry(result, durationMs) {
|
|
366
|
+
const telemetry = {
|
|
367
|
+
timestamp: new Date().toISOString(),
|
|
368
|
+
query: result.metadata.queryUsed,
|
|
369
|
+
serversReturned: result.servers.length,
|
|
370
|
+
toolsReturned: result.tools.length,
|
|
371
|
+
totalServersBefore: result.metadata.totalServersBeforeFilter,
|
|
372
|
+
totalToolsBefore: result.metadata.totalToolsBeforeFilter,
|
|
373
|
+
filterMode: result.metadata.filterMode,
|
|
374
|
+
durationMs,
|
|
375
|
+
confidenceScore: result.metadata.confidenceScore,
|
|
376
|
+
timeoutOccurred: result.metadata.timeoutOccurred,
|
|
377
|
+
};
|
|
378
|
+
this.logger.debug("[smart-filter] Telemetry:", JSON.stringify(telemetry));
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
// ── Standalone utility exports (for testing and external use) ────────────────
|
|
382
|
+
const MAX_KEYWORDS = 30;
|
|
383
|
+
const NOISE_WORDS = new Set([
|
|
384
|
+
"yes", "no", "ok", "okay", "sure", "yep", "nope", "yeah", "nah",
|
|
385
|
+
"do", "it", "please", "thanks", "thank", "you", "hi", "hello",
|
|
386
|
+
"hey", "right", "alright", "fine", "got", "hmm", "hm",
|
|
387
|
+
]);
|
|
388
|
+
export const DEFAULTS = {
|
|
389
|
+
enabled: false,
|
|
390
|
+
embedding: "keyword",
|
|
391
|
+
topServers: 5,
|
|
392
|
+
hardCap: 8,
|
|
393
|
+
topTools: 10,
|
|
394
|
+
serverThreshold: 0.15,
|
|
395
|
+
toolThreshold: 0.10,
|
|
396
|
+
fallback: "keyword",
|
|
397
|
+
alwaysInclude: [],
|
|
398
|
+
timeoutMs: 500,
|
|
399
|
+
telemetry: false,
|
|
400
|
+
};
|
|
401
|
+
/** Lowercase, split on whitespace + punctuation, preserve numbers, drop empties. */
|
|
402
|
+
export function tokenize(text) {
|
|
403
|
+
return text
|
|
404
|
+
.toLowerCase()
|
|
405
|
+
.split(/[\s\p{P}]+/u)
|
|
406
|
+
.filter(t => t.length > 0);
|
|
407
|
+
}
|
|
408
|
+
/** Normalize keywords: lowercase, trim, dedup, strip empties, cap at MAX_KEYWORDS. */
|
|
409
|
+
export function validateKeywords(raw) {
|
|
410
|
+
const seen = new Set();
|
|
411
|
+
const out = [];
|
|
412
|
+
for (const kw of raw) {
|
|
413
|
+
const normalized = kw.toLowerCase().trim();
|
|
414
|
+
if (normalized.length === 0 || seen.has(normalized))
|
|
415
|
+
continue;
|
|
416
|
+
seen.add(normalized);
|
|
417
|
+
out.push(normalized);
|
|
418
|
+
if (out.length >= MAX_KEYWORDS)
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
return out;
|
|
422
|
+
}
|
|
423
|
+
/**
|
|
424
|
+
* Extract a meaningful intent string from the last 1-3 user turns.
|
|
425
|
+
* Returns null if no meaningful query can be extracted.
|
|
426
|
+
*/
|
|
427
|
+
export function synthesizeQuery(userTurns) {
|
|
428
|
+
const recent = userTurns.slice(-3).reverse();
|
|
429
|
+
for (const turn of recent) {
|
|
430
|
+
const tokens = tokenize(turn).filter(t => !NOISE_WORDS.has(t));
|
|
431
|
+
if (tokens.length >= 2) {
|
|
432
|
+
return tokens.join(" ");
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
return null;
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Score a single server against a query using weighted word overlap.
|
|
439
|
+
* desc_matches * 1.0 + kw_only_matches * 0.5, normalized by query length.
|
|
440
|
+
*/
|
|
441
|
+
export function scoreServer(queryTokens, serverName, description, keywords) {
|
|
442
|
+
if (queryTokens.length === 0)
|
|
443
|
+
return 0;
|
|
444
|
+
const descTokens = new Set(tokenize(description));
|
|
445
|
+
for (const t of tokenize(serverName))
|
|
446
|
+
descTokens.add(t);
|
|
447
|
+
const kwTokens = new Set(validateKeywords(keywords).flatMap(kw => tokenize(kw)));
|
|
448
|
+
let descMatches = 0;
|
|
449
|
+
let kwOnlyMatches = 0;
|
|
450
|
+
for (const qt of queryTokens) {
|
|
451
|
+
if (descTokens.has(qt)) {
|
|
452
|
+
descMatches++;
|
|
453
|
+
}
|
|
454
|
+
else if (kwTokens.has(qt)) {
|
|
455
|
+
kwOnlyMatches++;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
return (descMatches * 1.0 + kwOnlyMatches * 0.5) / queryTokens.length;
|
|
459
|
+
}
|
|
460
|
+
/** Score all servers, return sorted highest-first. */
|
|
461
|
+
export function scoreAllServers(queryTokens, servers) {
|
|
462
|
+
const scores = [];
|
|
463
|
+
for (const [name, cfg] of Object.entries(servers)) {
|
|
464
|
+
scores.push({ name, score: scoreServer(queryTokens, name, cfg.description ?? "", cfg.keywords ?? []) });
|
|
465
|
+
}
|
|
466
|
+
return scores.sort((a, b) => b.score - a.score);
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Select top servers with dynamic expansion toward hardCap.
|
|
470
|
+
* If top score < threshold AND gap small → show all (true uncertainty).
|
|
471
|
+
*/
|
|
472
|
+
export function selectTopServers(scores, topServers, hardCap, threshold, alwaysInclude) {
|
|
473
|
+
if (scores.length === 0)
|
|
474
|
+
return [];
|
|
475
|
+
const topScore = scores[0].score;
|
|
476
|
+
if (topScore < threshold && scores.length > 1) {
|
|
477
|
+
const gap = topScore - scores[Math.min(scores.length - 1, topServers - 1)].score;
|
|
478
|
+
if (gap < 0.05) {
|
|
479
|
+
return scores.map(s => s.name);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
let k = Math.min(topServers, scores.length);
|
|
483
|
+
if (k < scores.length && k < hardCap) {
|
|
484
|
+
const kthScore = scores[k - 1].score;
|
|
485
|
+
while (k < Math.min(hardCap, scores.length)) {
|
|
486
|
+
if (scores[k].score >= kthScore * 0.8 && scores[k].score >= threshold) {
|
|
487
|
+
k++;
|
|
488
|
+
}
|
|
489
|
+
else {
|
|
490
|
+
break;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
const selected = new Set();
|
|
495
|
+
for (let i = 0; i < k && i < scores.length; i++) {
|
|
496
|
+
if (scores[i].score >= threshold || i === 0) {
|
|
497
|
+
selected.add(scores[i].name);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
for (const name of alwaysInclude)
|
|
501
|
+
selected.add(name);
|
|
502
|
+
return [...selected];
|
|
503
|
+
}
|
|
504
|
+
/**
|
|
505
|
+
* Run the smart filter. Returns the list of server names to include.
|
|
506
|
+
* Guarantees: never throws, never blocks longer than timeoutMs.
|
|
507
|
+
*/
|
|
508
|
+
export function filterServers(servers, userTurns, config, logger) {
|
|
509
|
+
const allServers = Object.keys(servers);
|
|
510
|
+
const showAll = (reason, query = null) => ({
|
|
511
|
+
filteredServers: allServers,
|
|
512
|
+
allServers,
|
|
513
|
+
query,
|
|
514
|
+
scores: [],
|
|
515
|
+
reason,
|
|
516
|
+
});
|
|
517
|
+
if (!config.enabled)
|
|
518
|
+
return showAll("disabled");
|
|
519
|
+
try {
|
|
520
|
+
const merged = { ...DEFAULTS, ...config };
|
|
521
|
+
const startTime = Date.now();
|
|
522
|
+
const query = synthesizeQuery(userTurns);
|
|
523
|
+
if (!query)
|
|
524
|
+
return showAll("no-query");
|
|
525
|
+
if (Date.now() - startTime > merged.timeoutMs) {
|
|
526
|
+
logger?.warn("[smart-filter] Timeout during query synthesis");
|
|
527
|
+
return showAll("timeout", query);
|
|
528
|
+
}
|
|
529
|
+
const queryTokens = tokenize(query);
|
|
530
|
+
if (queryTokens.length === 0)
|
|
531
|
+
return showAll("no-query");
|
|
532
|
+
const scores = scoreAllServers(queryTokens, servers);
|
|
533
|
+
if (Date.now() - startTime > merged.timeoutMs) {
|
|
534
|
+
logger?.warn("[smart-filter] Timeout during scoring");
|
|
535
|
+
return showAll("timeout", query);
|
|
536
|
+
}
|
|
537
|
+
const filteredServers = selectTopServers(scores, merged.topServers, merged.hardCap, merged.serverThreshold, merged.alwaysInclude);
|
|
538
|
+
return { filteredServers, allServers, query, scores, reason: "filtered" };
|
|
539
|
+
}
|
|
540
|
+
catch (err) {
|
|
541
|
+
logger?.error("[smart-filter] Error during filtering, showing all servers:", err);
|
|
542
|
+
return showAll("error");
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
/** Build a filtered router tool description string. */
|
|
546
|
+
export function buildFilteredDescription(allServers, filteredNames) {
|
|
547
|
+
const included = new Set(filteredNames);
|
|
548
|
+
const serverList = Object.entries(allServers)
|
|
549
|
+
.filter(([name]) => included.has(name))
|
|
550
|
+
.map(([name, cfg]) => {
|
|
551
|
+
const desc = cfg.description;
|
|
552
|
+
return desc ? `${name} (${desc})` : name;
|
|
553
|
+
})
|
|
554
|
+
.join(", ");
|
|
555
|
+
if (!serverList) {
|
|
556
|
+
return "Call MCP server tools. No servers matched the current context.";
|
|
557
|
+
}
|
|
558
|
+
return `Call any MCP server tool. Servers: ${serverList}. Use action='list' to discover tools and required parameters, action='call' to execute a tool, action='refresh' to clear cache and re-discover tools, and action='status' to check server connection states. If the user mentions a specific tool by name, the call action auto-connects and works without listing first.`;
|
|
559
|
+
}
|