mcp2cli 2.1.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/PKG-INFO +18 -183
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/README.md +17 -182
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/pyproject.toml +1 -1
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/src/mcp2cli/__init__.py +31 -10
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/src/mcp2cli/__main__.py +0 -0
- {mcp2cli-2.1.0 → mcp2cli-2.2.0}/src/mcp2cli/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp2cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Turn any MCP server or OpenAPI spec into a CLI
|
|
5
5
|
Author: Stephan Fitzpatrick
|
|
6
6
|
Author-email: Stephan Fitzpatrick <stephan@knowsuchagency.com>
|
|
@@ -68,25 +68,35 @@ mcp2cli --mcp https://mcp.example.com/sse --auth-header "x-api-key:sk-..." \
|
|
|
68
68
|
|
|
69
69
|
# Force a specific transport (skip streamable HTTP fallback dance)
|
|
70
70
|
mcp2cli --mcp https://mcp.example.com/sse --transport sse --list
|
|
71
|
+
|
|
72
|
+
# Search tools by name or description (case-insensitive substring match)
|
|
73
|
+
mcp2cli --mcp https://mcp.example.com/sse --search "task"
|
|
71
74
|
```
|
|
72
75
|
|
|
76
|
+
`--search` implies `--list` and works across all modes (`--mcp`, `--spec`, `--graphql`, `--mcp-stdio`).
|
|
77
|
+
|
|
73
78
|
### OAuth authentication
|
|
74
79
|
|
|
75
|
-
|
|
76
|
-
caching, and refresh automatically.
|
|
80
|
+
APIs that require OAuth are supported out of the box — across MCP, OpenAPI, and GraphQL modes.
|
|
81
|
+
mcp2cli handles token acquisition, caching, and refresh automatically.
|
|
77
82
|
|
|
78
83
|
```bash
|
|
79
84
|
# Authorization code + PKCE flow (opens browser for login)
|
|
80
85
|
mcp2cli --mcp https://mcp.example.com/sse --oauth --list
|
|
86
|
+
mcp2cli --spec https://api.example.com/openapi.json --oauth --list
|
|
87
|
+
mcp2cli --graphql https://api.example.com/graphql --oauth --list
|
|
81
88
|
|
|
82
89
|
# Client credentials flow (machine-to-machine, no browser)
|
|
83
|
-
mcp2cli --
|
|
90
|
+
mcp2cli --spec https://api.example.com/openapi.json \
|
|
84
91
|
--oauth-client-id "my-client-id" \
|
|
85
92
|
--oauth-client-secret "my-secret" \
|
|
86
|
-
|
|
93
|
+
list-pets
|
|
87
94
|
|
|
88
95
|
# With specific scopes
|
|
89
|
-
mcp2cli --
|
|
96
|
+
mcp2cli --graphql https://api.example.com/graphql --oauth --oauth-scope "read write" users
|
|
97
|
+
|
|
98
|
+
# Local spec file — use --base-url for OAuth discovery
|
|
99
|
+
mcp2cli --spec ./openapi.json --base-url https://api.example.com --oauth --list
|
|
90
100
|
```
|
|
91
101
|
|
|
92
102
|
Tokens are persisted in `~/.cache/mcp2cli/oauth/` so subsequent calls reuse existing tokens
|
|
@@ -170,68 +180,7 @@ mcp2cli --graphql https://api.example.com/graphql users --fields "id name email"
|
|
|
170
180
|
mcp2cli --graphql https://api.example.com/graphql --auth-header "Authorization:Bearer tok_..." users
|
|
171
181
|
```
|
|
172
182
|
|
|
173
|
-
mcp2cli introspects the endpoint, discovers queries and mutations, auto-generates selection sets, and constructs parameterized queries with proper variable declarations.
|
|
174
|
-
|
|
175
|
-
**GraphQL schema:**
|
|
176
|
-
|
|
177
|
-
```graphql
|
|
178
|
-
type Query {
|
|
179
|
-
users: [User!]!
|
|
180
|
-
user(id: ID!): User
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
type Mutation {
|
|
184
|
-
createUser(name: String!, email: String!, age: Int): User
|
|
185
|
-
deleteUser(id: ID!): Boolean
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
type User {
|
|
189
|
-
id: ID!
|
|
190
|
-
name: String!
|
|
191
|
-
email: String
|
|
192
|
-
age: Int
|
|
193
|
-
status: Status
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
enum Status { ACTIVE INACTIVE BANNED }
|
|
197
|
-
```
|
|
198
|
-
|
|
199
|
-
**What mcp2cli generates:**
|
|
200
|
-
|
|
201
|
-
```
|
|
202
|
-
$ mcp2cli --graphql https://api.example.com/graphql --list
|
|
203
|
-
|
|
204
|
-
query:
|
|
205
|
-
users List all users
|
|
206
|
-
user Get a user by ID
|
|
207
|
-
|
|
208
|
-
mutation:
|
|
209
|
-
create-user Create a new user
|
|
210
|
-
delete-user Delete a user by ID
|
|
211
|
-
|
|
212
|
-
$ mcp2cli --graphql https://api.example.com/graphql create-user --help
|
|
213
|
-
usage: mcp2cli create-user [--name NAME] [--email EMAIL] [--age AGE]
|
|
214
|
-
|
|
215
|
-
--name User name (String!, required)
|
|
216
|
-
--email User email (String!, required)
|
|
217
|
-
--age User age (Int)
|
|
218
|
-
|
|
219
|
-
$ mcp2cli --graphql https://api.example.com/graphql create-user --name "Alice" --email "alice@co.org"
|
|
220
|
-
{"id": "4", "name": "Alice", "email": "alice@co.org", "age": null, "status": null}
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
No SDL parsing, no code generation — just point and run.
|
|
224
|
-
|
|
225
|
-
### Tool search
|
|
226
|
-
|
|
227
|
-
```bash
|
|
228
|
-
# Search tools by name or description (case-insensitive substring match)
|
|
229
|
-
mcp2cli --mcp https://mcp.example.com/sse --search "task"
|
|
230
|
-
mcp2cli --spec ./openapi.json --search "create"
|
|
231
|
-
mcp2cli --mcp-stdio "npx @mcp/server" --search "deploy"
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
`--search` implies `--list` — it filters the tool listing to matching results.
|
|
183
|
+
mcp2cli introspects the endpoint, discovers queries and mutations, auto-generates selection sets, and constructs parameterized queries with proper variable declarations. No SDL parsing, no code generation — just point and run.
|
|
235
184
|
|
|
236
185
|
### Bake mode — save connection settings
|
|
237
186
|
|
|
@@ -347,121 +296,7 @@ Bake mode:
|
|
|
347
296
|
|
|
348
297
|
Subcommands and their flags are generated dynamically from the spec or MCP server tool definitions. Run `<subcommand> --help` for details.
|
|
349
298
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
If you've connected an LLM to more than a handful of tools, you've felt the pain. Every MCP server, every OpenAPI endpoint — their full schemas get injected into the system prompt on *every single turn*. Your 50-endpoint API costs 3,579 tokens of context *before the conversation even starts*, and that bill is paid again on every message, whether the model touches those tools or not.
|
|
353
|
-
|
|
354
|
-
This isn't a theoretical concern. 6 MCP servers with 84 tools consume ~15,540 tokens at session start. Converting those servers to CLIs and letting the LLM discover tools on-demand can slash that cost by 92-98%.
|
|
355
|
-
|
|
356
|
-
Even Anthropic recognized the problem, building [Tool Search](https://www.anthropic.com/engineering/advanced-tool-use) directly into their API — a deferred-loading pattern where tools are marked `defer_loading: true` and Claude discovers them via a search index (~500 tokens) instead of loading all schemas upfront. It typically cuts token usage by 85%. But when Tool Search fetches a tool, the full JSON Schema still enters context (~121 tokens/tool).
|
|
357
|
-
|
|
358
|
-
mcp2cli takes the CLI approach further.
|
|
359
|
-
|
|
360
|
-
## What mcp2cli adds
|
|
361
|
-
|
|
362
|
-
The idea is simple: give the LLM a CLI instead of raw tool schemas, and let it `--list` and `--help` its way to what it needs. mcp2cli builds on this with a few key differences:
|
|
363
|
-
|
|
364
|
-
- **No codegen, no recompilation.** Point mcp2cli at a spec URL or MCP server and the CLI exists immediately. When the server adds new endpoints, they appear on the next invocation — no rebuild step, no generated code to commit.
|
|
365
|
-
- **Provider-agnostic.** Tool Search is an Anthropic API feature. mcp2cli works with any LLM — Claude, GPT, Gemini, local models — because it's just a CLI tool the model can shell out to.
|
|
366
|
-
- **Compact discovery.** Tool Search defers loading but still injects full JSON schemas when a tool is fetched (~121 tokens/tool). mcp2cli's `--help` returns human-readable text that's typically cheaper than the raw schema, and `--list` summaries cost ~16 tokens/tool vs ~121 for native schemas.
|
|
367
|
-
- **OpenAPI and GraphQL support.** MCP isn't the only schema-rich protocol. mcp2cli handles OpenAPI specs (JSON or YAML, local or remote) and GraphQL endpoints (via introspection) with the same CLI interface, the same caching, and the same on-demand discovery. One tool for all three worlds.
|
|
368
|
-
- **Spec caching with TTL control.** Fetched specs and MCP tool lists are cached locally with configurable TTL, so repeated invocations don't hit the network. `--refresh` bypasses the cache when you need it.
|
|
369
|
-
|
|
370
|
-
## The numbers: how much context do you actually save?
|
|
371
|
-
|
|
372
|
-
We measured this. Not estimates — actual token counts using the cl100k_base tokenizer against real schemas, verified by [an automated test suite](tests/test_token_savings.py).
|
|
373
|
-
|
|
374
|
-
### What mcp2cli actually costs
|
|
375
|
-
|
|
376
|
-
Let's be upfront about what mcp2cli adds to context. It's not zero — it's just dramatically less than injecting full schemas.
|
|
377
|
-
|
|
378
|
-
| Component | Cost | When |
|
|
379
|
-
|---|--:|---|
|
|
380
|
-
| System prompt | 67 tokens | Every turn (fixed) |
|
|
381
|
-
| `--list` output | ~16 tokens/tool | Once per conversation |
|
|
382
|
-
| `--help` output | ~80-200 tokens/tool | Once per unique tool used |
|
|
383
|
-
| Tool call output | same as native | Per call |
|
|
384
|
-
|
|
385
|
-
The `--list` cost scales linearly with the number of tools — 30 tools costs ~464 tokens, 120 tools costs ~1,850 tokens. This is still 7-8x cheaper than the full schemas, and you only pay it once.
|
|
386
|
-
|
|
387
|
-
Compare that to native MCP injection: **~121 tokens per tool, every single turn**, whether the model uses those tools or not. For OpenAPI endpoints, it's ~72 tokens per endpoint per turn.
|
|
388
|
-
|
|
389
|
-
### Over a full conversation
|
|
390
|
-
|
|
391
|
-
Here's the total token cost across a realistic multi-turn conversation. The mcp2cli column includes all overhead: the system prompt on every turn, one `--list` discovery, `--help` for each unique tool the LLM actually uses, and tool call outputs.
|
|
392
|
-
|
|
393
|
-
**MCP servers:**
|
|
394
|
-
|
|
395
|
-
| Scenario | Turns | Unique tools used | Native total | mcp2cli total | Saved |
|
|
396
|
-
|---|--:|--:|--:|--:|--:|
|
|
397
|
-
| Task manager (30 tools) | 15 | 5 | 54,525 | 2,309 | **96%** |
|
|
398
|
-
| Multi-server (80 tools) | 20 | 8 | 193,360 | 3,897 | **98%** |
|
|
399
|
-
| Full platform (120 tools) | 25 | 10 | 362,350 | 5,181 | **99%** |
|
|
400
|
-
|
|
401
|
-
**OpenAPI specs:**
|
|
402
|
-
|
|
403
|
-
| Scenario | Turns | Unique endpoints used | Native total | mcp2cli total | Saved |
|
|
404
|
-
|---|--:|--:|--:|--:|--:|
|
|
405
|
-
| Petstore (5 endpoints) | 10 | 3 | 3,730 | 1,199 | **68%** |
|
|
406
|
-
| Medium API (20 endpoints) | 15 | 5 | 21,720 | 1,905 | **91%** |
|
|
407
|
-
| Large API (50 endpoints) | 20 | 8 | 71,940 | 2,810 | **96%** |
|
|
408
|
-
| Enterprise API (200 endpoints) | 25 | 10 | 358,425 | 3,925 | **99%** |
|
|
409
|
-
|
|
410
|
-
A 120-tool MCP platform over 25 turns: **357,169 tokens saved**.
|
|
411
|
-
|
|
412
|
-
### Turn-by-turn: watching the gap widen
|
|
413
|
-
|
|
414
|
-
Here's a 30-tool MCP server over 10 turns. The mcp2cli column includes the real costs: `--list` discovery on turn 1, `--help` + tool output when each new tool is first used.
|
|
415
|
-
|
|
416
|
-
```
|
|
417
|
-
Turn Native mcp2cli Savings
|
|
418
|
-
──────────────────────────────────────────────────────────
|
|
419
|
-
1 3,619 531 3,088 ← --list (464 tokens)
|
|
420
|
-
2 7,238 598 6,640
|
|
421
|
-
3 10,887 815 10,072 ← --help (120) + tool call
|
|
422
|
-
4 14,506 882 13,624
|
|
423
|
-
5 18,155 1,099 17,056 ← --help (120) + tool call
|
|
424
|
-
6 21,774 1,166 20,608
|
|
425
|
-
7 25,423 1,383 24,040 ← --help (120) + tool call
|
|
426
|
-
8 29,042 1,450 27,592
|
|
427
|
-
9 32,691 1,667 31,024 ← --help (120) + tool call
|
|
428
|
-
10 36,310 1,734 34,576
|
|
429
|
-
|
|
430
|
-
Total: 34,576 tokens saved (95.2%)
|
|
431
|
-
```
|
|
432
|
-
|
|
433
|
-
### Why the gap is so large
|
|
434
|
-
|
|
435
|
-
**Native MCP approach** — pay the full schema tax on every turn:
|
|
436
|
-
```
|
|
437
|
-
System prompt: "You have these 30 tools: [3,619 tokens of JSON schemas]"
|
|
438
|
-
→ 3,619 tokens consumed per turn, whether used or not
|
|
439
|
-
→ 10 turns = 36,310 tokens
|
|
440
|
-
```
|
|
441
|
-
|
|
442
|
-
**mcp2cli approach** — pay only for what you use:
|
|
443
|
-
```
|
|
444
|
-
System prompt: "Use mcp2cli --mcp <url> <command> [--flags]" (67 tokens/turn)
|
|
445
|
-
→ mcp2cli --mcp <url> --list (464 tokens, once)
|
|
446
|
-
→ mcp2cli --mcp <url> create-task --help (120 tokens, once per tool)
|
|
447
|
-
→ mcp2cli --mcp <url> create-task --title "Fix bug" (0 extra tokens)
|
|
448
|
-
→ 10 turns, 4 unique tools = 1,734 tokens
|
|
449
|
-
```
|
|
450
|
-
|
|
451
|
-
The LLM discovers what it needs, when it needs it. Everything else stays out of context.
|
|
452
|
-
|
|
453
|
-
### The multi-server problem
|
|
454
|
-
|
|
455
|
-
This is where it really hurts. Connect 3 MCP servers (a task manager, a filesystem server, and a database server — 60 tools total) and you're paying 7,238 tokens per turn. Over a 20-turn conversation, that's **145,060 tokens** just for tool schemas. mcp2cli reduces that to **3,288 tokens** — a **97.7% reduction** — even after accounting for `--list` discovery (928 tokens) and `--help` for 6 unique tools (720 tokens).
|
|
456
|
-
|
|
457
|
-
## How it works
|
|
458
|
-
|
|
459
|
-
1. **Load** -- Fetch the OpenAPI spec or connect to the MCP server. Resolve `$ref`s. Cache for reuse.
|
|
460
|
-
2. **Extract** -- Walk the spec paths/tools and produce a uniform list of command definitions with typed parameters.
|
|
461
|
-
3. **Build** -- Generate an argparse parser with subcommands, flags, types, choices, and help text.
|
|
462
|
-
4. **Execute** -- Dispatch the parsed args as an HTTP request (OpenAPI) or tool call (MCP).
|
|
463
|
-
|
|
464
|
-
Both adapters produce the same internal `CommandDef` structure, so the CLI builder and output handling are shared.
|
|
299
|
+
> For token savings analysis, architecture details, and comparison to Anthropic's Tool Search, see the **[full writeup on the OCAI blog](https://www.orangecountyai.com/blog/mcp2cli-one-cli-for-every-api-zero-wasted-tokens)**.
|
|
465
300
|
|
|
466
301
|
## Development
|
|
467
302
|
|
|
@@ -49,25 +49,35 @@ mcp2cli --mcp https://mcp.example.com/sse --auth-header "x-api-key:sk-..." \
|
|
|
49
49
|
|
|
50
50
|
# Force a specific transport (skip streamable HTTP fallback dance)
|
|
51
51
|
mcp2cli --mcp https://mcp.example.com/sse --transport sse --list
|
|
52
|
+
|
|
53
|
+
# Search tools by name or description (case-insensitive substring match)
|
|
54
|
+
mcp2cli --mcp https://mcp.example.com/sse --search "task"
|
|
52
55
|
```
|
|
53
56
|
|
|
57
|
+
`--search` implies `--list` and works across all modes (`--mcp`, `--spec`, `--graphql`, `--mcp-stdio`).
|
|
58
|
+
|
|
54
59
|
### OAuth authentication
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
caching, and refresh automatically.
|
|
61
|
+
APIs that require OAuth are supported out of the box — across MCP, OpenAPI, and GraphQL modes.
|
|
62
|
+
mcp2cli handles token acquisition, caching, and refresh automatically.
|
|
58
63
|
|
|
59
64
|
```bash
|
|
60
65
|
# Authorization code + PKCE flow (opens browser for login)
|
|
61
66
|
mcp2cli --mcp https://mcp.example.com/sse --oauth --list
|
|
67
|
+
mcp2cli --spec https://api.example.com/openapi.json --oauth --list
|
|
68
|
+
mcp2cli --graphql https://api.example.com/graphql --oauth --list
|
|
62
69
|
|
|
63
70
|
# Client credentials flow (machine-to-machine, no browser)
|
|
64
|
-
mcp2cli --
|
|
71
|
+
mcp2cli --spec https://api.example.com/openapi.json \
|
|
65
72
|
--oauth-client-id "my-client-id" \
|
|
66
73
|
--oauth-client-secret "my-secret" \
|
|
67
|
-
|
|
74
|
+
list-pets
|
|
68
75
|
|
|
69
76
|
# With specific scopes
|
|
70
|
-
mcp2cli --
|
|
77
|
+
mcp2cli --graphql https://api.example.com/graphql --oauth --oauth-scope "read write" users
|
|
78
|
+
|
|
79
|
+
# Local spec file — use --base-url for OAuth discovery
|
|
80
|
+
mcp2cli --spec ./openapi.json --base-url https://api.example.com --oauth --list
|
|
71
81
|
```
|
|
72
82
|
|
|
73
83
|
Tokens are persisted in `~/.cache/mcp2cli/oauth/` so subsequent calls reuse existing tokens
|
|
@@ -151,68 +161,7 @@ mcp2cli --graphql https://api.example.com/graphql users --fields "id name email"
|
|
|
151
161
|
mcp2cli --graphql https://api.example.com/graphql --auth-header "Authorization:Bearer tok_..." users
|
|
152
162
|
```
|
|
153
163
|
|
|
154
|
-
mcp2cli introspects the endpoint, discovers queries and mutations, auto-generates selection sets, and constructs parameterized queries with proper variable declarations.
|
|
155
|
-
|
|
156
|
-
**GraphQL schema:**
|
|
157
|
-
|
|
158
|
-
```graphql
|
|
159
|
-
type Query {
|
|
160
|
-
users: [User!]!
|
|
161
|
-
user(id: ID!): User
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
type Mutation {
|
|
165
|
-
createUser(name: String!, email: String!, age: Int): User
|
|
166
|
-
deleteUser(id: ID!): Boolean
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
type User {
|
|
170
|
-
id: ID!
|
|
171
|
-
name: String!
|
|
172
|
-
email: String
|
|
173
|
-
age: Int
|
|
174
|
-
status: Status
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
enum Status { ACTIVE INACTIVE BANNED }
|
|
178
|
-
```
|
|
179
|
-
|
|
180
|
-
**What mcp2cli generates:**
|
|
181
|
-
|
|
182
|
-
```
|
|
183
|
-
$ mcp2cli --graphql https://api.example.com/graphql --list
|
|
184
|
-
|
|
185
|
-
query:
|
|
186
|
-
users List all users
|
|
187
|
-
user Get a user by ID
|
|
188
|
-
|
|
189
|
-
mutation:
|
|
190
|
-
create-user Create a new user
|
|
191
|
-
delete-user Delete a user by ID
|
|
192
|
-
|
|
193
|
-
$ mcp2cli --graphql https://api.example.com/graphql create-user --help
|
|
194
|
-
usage: mcp2cli create-user [--name NAME] [--email EMAIL] [--age AGE]
|
|
195
|
-
|
|
196
|
-
--name User name (String!, required)
|
|
197
|
-
--email User email (String!, required)
|
|
198
|
-
--age User age (Int)
|
|
199
|
-
|
|
200
|
-
$ mcp2cli --graphql https://api.example.com/graphql create-user --name "Alice" --email "alice@co.org"
|
|
201
|
-
{"id": "4", "name": "Alice", "email": "alice@co.org", "age": null, "status": null}
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
No SDL parsing, no code generation — just point and run.
|
|
205
|
-
|
|
206
|
-
### Tool search
|
|
207
|
-
|
|
208
|
-
```bash
|
|
209
|
-
# Search tools by name or description (case-insensitive substring match)
|
|
210
|
-
mcp2cli --mcp https://mcp.example.com/sse --search "task"
|
|
211
|
-
mcp2cli --spec ./openapi.json --search "create"
|
|
212
|
-
mcp2cli --mcp-stdio "npx @mcp/server" --search "deploy"
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
`--search` implies `--list` — it filters the tool listing to matching results.
|
|
164
|
+
mcp2cli introspects the endpoint, discovers queries and mutations, auto-generates selection sets, and constructs parameterized queries with proper variable declarations. No SDL parsing, no code generation — just point and run.
|
|
216
165
|
|
|
217
166
|
### Bake mode — save connection settings
|
|
218
167
|
|
|
@@ -328,121 +277,7 @@ Bake mode:
|
|
|
328
277
|
|
|
329
278
|
Subcommands and their flags are generated dynamically from the spec or MCP server tool definitions. Run `<subcommand> --help` for details.
|
|
330
279
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
If you've connected an LLM to more than a handful of tools, you've felt the pain. Every MCP server, every OpenAPI endpoint — their full schemas get injected into the system prompt on *every single turn*. Your 50-endpoint API costs 3,579 tokens of context *before the conversation even starts*, and that bill is paid again on every message, whether the model touches those tools or not.
|
|
334
|
-
|
|
335
|
-
This isn't a theoretical concern. 6 MCP servers with 84 tools consume ~15,540 tokens at session start. Converting those servers to CLIs and letting the LLM discover tools on-demand can slash that cost by 92-98%.
|
|
336
|
-
|
|
337
|
-
Even Anthropic recognized the problem, building [Tool Search](https://www.anthropic.com/engineering/advanced-tool-use) directly into their API — a deferred-loading pattern where tools are marked `defer_loading: true` and Claude discovers them via a search index (~500 tokens) instead of loading all schemas upfront. It typically cuts token usage by 85%. But when Tool Search fetches a tool, the full JSON Schema still enters context (~121 tokens/tool).
|
|
338
|
-
|
|
339
|
-
mcp2cli takes the CLI approach further.
|
|
340
|
-
|
|
341
|
-
## What mcp2cli adds
|
|
342
|
-
|
|
343
|
-
The idea is simple: give the LLM a CLI instead of raw tool schemas, and let it `--list` and `--help` its way to what it needs. mcp2cli builds on this with a few key differences:
|
|
344
|
-
|
|
345
|
-
- **No codegen, no recompilation.** Point mcp2cli at a spec URL or MCP server and the CLI exists immediately. When the server adds new endpoints, they appear on the next invocation — no rebuild step, no generated code to commit.
|
|
346
|
-
- **Provider-agnostic.** Tool Search is an Anthropic API feature. mcp2cli works with any LLM — Claude, GPT, Gemini, local models — because it's just a CLI tool the model can shell out to.
|
|
347
|
-
- **Compact discovery.** Tool Search defers loading but still injects full JSON schemas when a tool is fetched (~121 tokens/tool). mcp2cli's `--help` returns human-readable text that's typically cheaper than the raw schema, and `--list` summaries cost ~16 tokens/tool vs ~121 for native schemas.
|
|
348
|
-
- **OpenAPI and GraphQL support.** MCP isn't the only schema-rich protocol. mcp2cli handles OpenAPI specs (JSON or YAML, local or remote) and GraphQL endpoints (via introspection) with the same CLI interface, the same caching, and the same on-demand discovery. One tool for all three worlds.
|
|
349
|
-
- **Spec caching with TTL control.** Fetched specs and MCP tool lists are cached locally with configurable TTL, so repeated invocations don't hit the network. `--refresh` bypasses the cache when you need it.
|
|
350
|
-
|
|
351
|
-
## The numbers: how much context do you actually save?
|
|
352
|
-
|
|
353
|
-
We measured this. Not estimates — actual token counts using the cl100k_base tokenizer against real schemas, verified by [an automated test suite](tests/test_token_savings.py).
|
|
354
|
-
|
|
355
|
-
### What mcp2cli actually costs
|
|
356
|
-
|
|
357
|
-
Let's be upfront about what mcp2cli adds to context. It's not zero — it's just dramatically less than injecting full schemas.
|
|
358
|
-
|
|
359
|
-
| Component | Cost | When |
|
|
360
|
-
|---|--:|---|
|
|
361
|
-
| System prompt | 67 tokens | Every turn (fixed) |
|
|
362
|
-
| `--list` output | ~16 tokens/tool | Once per conversation |
|
|
363
|
-
| `--help` output | ~80-200 tokens/tool | Once per unique tool used |
|
|
364
|
-
| Tool call output | same as native | Per call |
|
|
365
|
-
|
|
366
|
-
The `--list` cost scales linearly with the number of tools — 30 tools costs ~464 tokens, 120 tools costs ~1,850 tokens. This is still 7-8x cheaper than the full schemas, and you only pay it once.
|
|
367
|
-
|
|
368
|
-
Compare that to native MCP injection: **~121 tokens per tool, every single turn**, whether the model uses those tools or not. For OpenAPI endpoints, it's ~72 tokens per endpoint per turn.
|
|
369
|
-
|
|
370
|
-
### Over a full conversation
|
|
371
|
-
|
|
372
|
-
Here's the total token cost across a realistic multi-turn conversation. The mcp2cli column includes all overhead: the system prompt on every turn, one `--list` discovery, `--help` for each unique tool the LLM actually uses, and tool call outputs.
|
|
373
|
-
|
|
374
|
-
**MCP servers:**
|
|
375
|
-
|
|
376
|
-
| Scenario | Turns | Unique tools used | Native total | mcp2cli total | Saved |
|
|
377
|
-
|---|--:|--:|--:|--:|--:|
|
|
378
|
-
| Task manager (30 tools) | 15 | 5 | 54,525 | 2,309 | **96%** |
|
|
379
|
-
| Multi-server (80 tools) | 20 | 8 | 193,360 | 3,897 | **98%** |
|
|
380
|
-
| Full platform (120 tools) | 25 | 10 | 362,350 | 5,181 | **99%** |
|
|
381
|
-
|
|
382
|
-
**OpenAPI specs:**
|
|
383
|
-
|
|
384
|
-
| Scenario | Turns | Unique endpoints used | Native total | mcp2cli total | Saved |
|
|
385
|
-
|---|--:|--:|--:|--:|--:|
|
|
386
|
-
| Petstore (5 endpoints) | 10 | 3 | 3,730 | 1,199 | **68%** |
|
|
387
|
-
| Medium API (20 endpoints) | 15 | 5 | 21,720 | 1,905 | **91%** |
|
|
388
|
-
| Large API (50 endpoints) | 20 | 8 | 71,940 | 2,810 | **96%** |
|
|
389
|
-
| Enterprise API (200 endpoints) | 25 | 10 | 358,425 | 3,925 | **99%** |
|
|
390
|
-
|
|
391
|
-
A 120-tool MCP platform over 25 turns: **357,169 tokens saved**.
|
|
392
|
-
|
|
393
|
-
### Turn-by-turn: watching the gap widen
|
|
394
|
-
|
|
395
|
-
Here's a 30-tool MCP server over 10 turns. The mcp2cli column includes the real costs: `--list` discovery on turn 1, `--help` + tool output when each new tool is first used.
|
|
396
|
-
|
|
397
|
-
```
|
|
398
|
-
Turn Native mcp2cli Savings
|
|
399
|
-
──────────────────────────────────────────────────────────
|
|
400
|
-
1 3,619 531 3,088 ← --list (464 tokens)
|
|
401
|
-
2 7,238 598 6,640
|
|
402
|
-
3 10,887 815 10,072 ← --help (120) + tool call
|
|
403
|
-
4 14,506 882 13,624
|
|
404
|
-
5 18,155 1,099 17,056 ← --help (120) + tool call
|
|
405
|
-
6 21,774 1,166 20,608
|
|
406
|
-
7 25,423 1,383 24,040 ← --help (120) + tool call
|
|
407
|
-
8 29,042 1,450 27,592
|
|
408
|
-
9 32,691 1,667 31,024 ← --help (120) + tool call
|
|
409
|
-
10 36,310 1,734 34,576
|
|
410
|
-
|
|
411
|
-
Total: 34,576 tokens saved (95.2%)
|
|
412
|
-
```
|
|
413
|
-
|
|
414
|
-
### Why the gap is so large
|
|
415
|
-
|
|
416
|
-
**Native MCP approach** — pay the full schema tax on every turn:
|
|
417
|
-
```
|
|
418
|
-
System prompt: "You have these 30 tools: [3,619 tokens of JSON schemas]"
|
|
419
|
-
→ 3,619 tokens consumed per turn, whether used or not
|
|
420
|
-
→ 10 turns = 36,310 tokens
|
|
421
|
-
```
|
|
422
|
-
|
|
423
|
-
**mcp2cli approach** — pay only for what you use:
|
|
424
|
-
```
|
|
425
|
-
System prompt: "Use mcp2cli --mcp <url> <command> [--flags]" (67 tokens/turn)
|
|
426
|
-
→ mcp2cli --mcp <url> --list (464 tokens, once)
|
|
427
|
-
→ mcp2cli --mcp <url> create-task --help (120 tokens, once per tool)
|
|
428
|
-
→ mcp2cli --mcp <url> create-task --title "Fix bug" (0 extra tokens)
|
|
429
|
-
→ 10 turns, 4 unique tools = 1,734 tokens
|
|
430
|
-
```
|
|
431
|
-
|
|
432
|
-
The LLM discovers what it needs, when it needs it. Everything else stays out of context.
|
|
433
|
-
|
|
434
|
-
### The multi-server problem
|
|
435
|
-
|
|
436
|
-
This is where it really hurts. Connect 3 MCP servers (a task manager, a filesystem server, and a database server — 60 tools total) and you're paying 7,238 tokens per turn. Over a 20-turn conversation, that's **145,060 tokens** just for tool schemas. mcp2cli reduces that to **3,288 tokens** — a **97.7% reduction** — even after accounting for `--list` discovery (928 tokens) and `--help` for 6 unique tools (720 tokens).
|
|
437
|
-
|
|
438
|
-
## How it works
|
|
439
|
-
|
|
440
|
-
1. **Load** -- Fetch the OpenAPI spec or connect to the MCP server. Resolve `$ref`s. Cache for reuse.
|
|
441
|
-
2. **Extract** -- Walk the spec paths/tools and produce a uniform list of command definitions with typed parameters.
|
|
442
|
-
3. **Build** -- Generate an argparse parser with subcommands, flags, types, choices, and help text.
|
|
443
|
-
4. **Execute** -- Dispatch the parsed args as an HTTP request (OpenAPI) or tool call (MCP).
|
|
444
|
-
|
|
445
|
-
Both adapters produce the same internal `CommandDef` structure, so the CLI builder and output handling are shared.
|
|
280
|
+
> For token savings analysis, architecture details, and comparison to Anthropic's Tool Search, see the **[full writeup on the OCAI blog](https://www.orangecountyai.com/blog/mcp2cli-one-cli-for-every-api-zero-wasted-tokens)**.
|
|
446
281
|
|
|
447
282
|
## Development
|
|
448
283
|
|
|
@@ -371,7 +371,7 @@ def build_oauth_provider(
|
|
|
371
371
|
client_secret: str | None = None,
|
|
372
372
|
scope: str | None = None,
|
|
373
373
|
) -> "httpx.Auth":
|
|
374
|
-
"""Build an OAuth provider for
|
|
374
|
+
"""Build an OAuth provider for HTTP connections.
|
|
375
375
|
|
|
376
376
|
If client_id and client_secret are provided, uses client credentials flow.
|
|
377
377
|
Otherwise, uses authorization code + PKCE with a local callback server.
|
|
@@ -482,6 +482,7 @@ def load_openapi_spec(
|
|
|
482
482
|
cache_key: str | None,
|
|
483
483
|
ttl: int,
|
|
484
484
|
refresh: bool,
|
|
485
|
+
oauth_provider: "httpx.Auth | None" = None,
|
|
485
486
|
) -> dict:
|
|
486
487
|
is_url = source.startswith("http://") or source.startswith("https://")
|
|
487
488
|
|
|
@@ -493,7 +494,7 @@ def load_openapi_spec(
|
|
|
493
494
|
return cached
|
|
494
495
|
|
|
495
496
|
headers = dict(auth_headers)
|
|
496
|
-
with httpx.Client(timeout=30) as client:
|
|
497
|
+
with httpx.Client(timeout=30, auth=oauth_provider) as client:
|
|
497
498
|
resp = client.get(source, headers=headers)
|
|
498
499
|
resp.raise_for_status()
|
|
499
500
|
raw = resp.text
|
|
@@ -840,6 +841,7 @@ def load_graphql_schema(
|
|
|
840
841
|
cache_key: str | None,
|
|
841
842
|
ttl: int,
|
|
842
843
|
refresh: bool,
|
|
844
|
+
oauth_provider: "httpx.Auth | None" = None,
|
|
843
845
|
) -> dict:
|
|
844
846
|
"""POST introspection query to a GraphQL endpoint, with caching."""
|
|
845
847
|
key = cache_key or cache_key_for(f"graphql:{url}")
|
|
@@ -850,7 +852,7 @@ def load_graphql_schema(
|
|
|
850
852
|
|
|
851
853
|
headers = dict(auth_headers)
|
|
852
854
|
headers.setdefault("Content-Type", "application/json")
|
|
853
|
-
with httpx.Client(timeout=30) as client:
|
|
855
|
+
with httpx.Client(timeout=30, auth=oauth_provider) as client:
|
|
854
856
|
resp = client.post(
|
|
855
857
|
url,
|
|
856
858
|
headers=headers,
|
|
@@ -999,6 +1001,7 @@ def execute_graphql(
|
|
|
999
1001
|
raw: bool,
|
|
1000
1002
|
toon: bool = False,
|
|
1001
1003
|
fields_override: str | None = None,
|
|
1004
|
+
oauth_provider: "httpx.Auth | None" = None,
|
|
1002
1005
|
):
|
|
1003
1006
|
"""Build and execute a GraphQL query/mutation."""
|
|
1004
1007
|
types_by_name = {t["name"]: t for t in schema.get("types", []) if t.get("name")}
|
|
@@ -1044,7 +1047,7 @@ def execute_graphql(
|
|
|
1044
1047
|
headers = dict(auth_headers)
|
|
1045
1048
|
headers.setdefault("Content-Type", "application/json")
|
|
1046
1049
|
|
|
1047
|
-
with httpx.Client(timeout=60) as client:
|
|
1050
|
+
with httpx.Client(timeout=60, auth=oauth_provider) as client:
|
|
1048
1051
|
resp = client.post(
|
|
1049
1052
|
url,
|
|
1050
1053
|
headers=headers,
|
|
@@ -1084,9 +1087,10 @@ def handle_graphql(
|
|
|
1084
1087
|
refresh: bool,
|
|
1085
1088
|
toon: bool = False,
|
|
1086
1089
|
fields_override: str | None = None,
|
|
1090
|
+
oauth_provider: "httpx.Auth | None" = None,
|
|
1087
1091
|
):
|
|
1088
1092
|
"""Top-level handler for --graphql mode."""
|
|
1089
|
-
schema = load_graphql_schema(url, auth_headers, cache_key, ttl, refresh)
|
|
1093
|
+
schema = load_graphql_schema(url, auth_headers, cache_key, ttl, refresh, oauth_provider=oauth_provider)
|
|
1090
1094
|
commands = extract_graphql_commands(schema)
|
|
1091
1095
|
|
|
1092
1096
|
if list_mode:
|
|
@@ -1110,7 +1114,7 @@ def handle_graphql(
|
|
|
1110
1114
|
cmd: CommandDef = args._cmd
|
|
1111
1115
|
execute_graphql(
|
|
1112
1116
|
args, cmd, url, schema, auth_headers, pretty, raw, toon=toon,
|
|
1113
|
-
fields_override=fields_override,
|
|
1117
|
+
fields_override=fields_override, oauth_provider=oauth_provider,
|
|
1114
1118
|
)
|
|
1115
1119
|
|
|
1116
1120
|
|
|
@@ -1565,6 +1569,7 @@ def execute_openapi(
|
|
|
1565
1569
|
pretty: bool,
|
|
1566
1570
|
raw: bool,
|
|
1567
1571
|
toon: bool = False,
|
|
1572
|
+
oauth_provider: "httpx.Auth | None" = None,
|
|
1568
1573
|
):
|
|
1569
1574
|
path = cmd.path or ""
|
|
1570
1575
|
# Substitute path parameters
|
|
@@ -1616,7 +1621,7 @@ def execute_openapi(
|
|
|
1616
1621
|
if not body:
|
|
1617
1622
|
body = None
|
|
1618
1623
|
|
|
1619
|
-
with httpx.Client(timeout=60) as client:
|
|
1624
|
+
with httpx.Client(timeout=60, auth=oauth_provider) as client:
|
|
1620
1625
|
resp = client.request(
|
|
1621
1626
|
(cmd.method or "get").upper(),
|
|
1622
1627
|
url,
|
|
@@ -2930,9 +2935,22 @@ def _main_impl(argv: list[str], bake_config: BakeConfig | None = None):
|
|
|
2930
2935
|
file=sys.stderr,
|
|
2931
2936
|
)
|
|
2932
2937
|
sys.exit(1)
|
|
2933
|
-
if
|
|
2938
|
+
if pre_args.mcp_stdio:
|
|
2934
2939
|
print(
|
|
2935
|
-
"Error: OAuth is
|
|
2940
|
+
"Error: OAuth is not supported with --mcp-stdio", file=sys.stderr
|
|
2941
|
+
)
|
|
2942
|
+
sys.exit(1)
|
|
2943
|
+
# Determine OAuth server URL for discovery
|
|
2944
|
+
server_url = pre_args.mcp or pre_args.graphql
|
|
2945
|
+
if not server_url and pre_args.spec:
|
|
2946
|
+
if pre_args.spec.startswith("http"):
|
|
2947
|
+
server_url = pre_args.spec
|
|
2948
|
+
else:
|
|
2949
|
+
server_url = pre_args.base_url
|
|
2950
|
+
if not server_url:
|
|
2951
|
+
print(
|
|
2952
|
+
"Error: OAuth requires an HTTP URL (use --base-url with local spec files)",
|
|
2953
|
+
file=sys.stderr,
|
|
2936
2954
|
)
|
|
2937
2955
|
sys.exit(1)
|
|
2938
2956
|
client_id = (
|
|
@@ -2946,7 +2964,7 @@ def _main_impl(argv: list[str], bake_config: BakeConfig | None = None):
|
|
|
2946
2964
|
else None
|
|
2947
2965
|
)
|
|
2948
2966
|
oauth_provider = build_oauth_provider(
|
|
2949
|
-
|
|
2967
|
+
server_url,
|
|
2950
2968
|
client_id=client_id,
|
|
2951
2969
|
client_secret=client_secret,
|
|
2952
2970
|
scope=pre_args.oauth_scope,
|
|
@@ -3129,6 +3147,7 @@ def _main_impl(argv: list[str], bake_config: BakeConfig | None = None):
|
|
|
3129
3147
|
pre_args.refresh,
|
|
3130
3148
|
toon=pre_args.toon,
|
|
3131
3149
|
fields_override=pre_args.fields,
|
|
3150
|
+
oauth_provider=oauth_provider,
|
|
3132
3151
|
)
|
|
3133
3152
|
return
|
|
3134
3153
|
|
|
@@ -3168,6 +3187,7 @@ def _main_impl(argv: list[str], bake_config: BakeConfig | None = None):
|
|
|
3168
3187
|
pre_args.cache_key,
|
|
3169
3188
|
pre_args.cache_ttl,
|
|
3170
3189
|
pre_args.refresh,
|
|
3190
|
+
oauth_provider=oauth_provider,
|
|
3171
3191
|
)
|
|
3172
3192
|
commands = extract_openapi_commands(spec)
|
|
3173
3193
|
if bake_config:
|
|
@@ -3229,6 +3249,7 @@ def _main_impl(argv: list[str], bake_config: BakeConfig | None = None):
|
|
|
3229
3249
|
pre_args.pretty,
|
|
3230
3250
|
pre_args.raw,
|
|
3231
3251
|
toon=pre_args.toon,
|
|
3252
|
+
oauth_provider=oauth_provider,
|
|
3232
3253
|
)
|
|
3233
3254
|
|
|
3234
3255
|
|
|
File without changes
|
|
File without changes
|