casual-mcp 0.3.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- casual_mcp-0.6.0/PKG-INFO +691 -0
- casual_mcp-0.6.0/README.md +667 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/pyproject.toml +25 -21
- casual_mcp-0.6.0/src/casual_mcp/__init__.py +24 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/cli.py +24 -24
- casual_mcp-0.6.0/src/casual_mcp/convert_tools.py +68 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/logging.py +6 -2
- casual_mcp-0.6.0/src/casual_mcp/main.py +120 -0
- casual_mcp-0.6.0/src/casual_mcp/mcp_tool_chat.py +232 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/__init__.py +21 -8
- casual_mcp-0.6.0/src/casual_mcp/models/chat_stats.py +37 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/config.py +2 -2
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/generation_error.py +1 -1
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/model_config.py +3 -3
- casual_mcp-0.6.0/src/casual_mcp/provider_factory.py +47 -0
- casual_mcp-0.6.0/src/casual_mcp/tool_cache.py +114 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/utils.py +18 -11
- casual_mcp-0.6.0/src/casual_mcp.egg-info/PKG-INFO +691 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/SOURCES.txt +11 -7
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/requires.txt +3 -10
- casual_mcp-0.6.0/tests/test_chat_stats.py +118 -0
- casual_mcp-0.6.0/tests/test_mcp_tool_chat.py +499 -0
- casual_mcp-0.6.0/tests/test_provider_factory.py +106 -0
- casual_mcp-0.6.0/tests/test_session_management.py +57 -0
- casual_mcp-0.6.0/tests/test_tool_cache.py +173 -0
- casual_mcp-0.6.0/tests/test_tools.py +114 -0
- casual_mcp-0.6.0/tests/test_utils.py +130 -0
- casual_mcp-0.3.1/PKG-INFO +0 -398
- casual_mcp-0.3.1/README.md +0 -368
- casual_mcp-0.3.1/src/casual_mcp/__init__.py +0 -13
- casual_mcp-0.3.1/src/casual_mcp/main.py +0 -119
- casual_mcp-0.3.1/src/casual_mcp/mcp_tool_chat.py +0 -154
- casual_mcp-0.3.1/src/casual_mcp/models/messages.py +0 -31
- casual_mcp-0.3.1/src/casual_mcp/models/tool_call.py +0 -14
- casual_mcp-0.3.1/src/casual_mcp/providers/__init__.py +0 -0
- casual_mcp-0.3.1/src/casual_mcp/providers/abstract_provider.py +0 -15
- casual_mcp-0.3.1/src/casual_mcp/providers/ollama_provider.py +0 -72
- casual_mcp-0.3.1/src/casual_mcp/providers/openai_provider.py +0 -178
- casual_mcp-0.3.1/src/casual_mcp/providers/provider_factory.py +0 -56
- casual_mcp-0.3.1/src/casual_mcp.egg-info/PKG-INFO +0 -398
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/LICENSE +0 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/setup.cfg +0 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/mcp_server_config.py +0 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/dependency_links.txt +0 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/entry_points.txt +0 -0
- {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: casual-mcp
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: Multi-server MCP client for LLM tool orchestration
|
|
5
|
+
Author: Alex Stansfield
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/casualgenius/casual-mcp
|
|
8
|
+
Project-URL: Repository, https://github.com/casualgenius/casual-mcp
|
|
9
|
+
Project-URL: Issue Tracker, https://github.com/casualgenius/casual-mcp/issues
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: casual-llm[openai]>=0.4.3
|
|
14
|
+
Requires-Dist: dateparser>=1.2.1
|
|
15
|
+
Requires-Dist: fastapi>=0.115.12
|
|
16
|
+
Requires-Dist: fastmcp>=2.12.4
|
|
17
|
+
Requires-Dist: jinja2>=3.1.6
|
|
18
|
+
Requires-Dist: python-dotenv>=1.1.0
|
|
19
|
+
Requires-Dist: requests>=2.32.3
|
|
20
|
+
Requires-Dist: rich>=14.0.0
|
|
21
|
+
Requires-Dist: typer>=0.19.2
|
|
22
|
+
Requires-Dist: uvicorn>=0.34.2
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# 🧠 Casual MCP
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+

|
|
29
|
+
|
|
30
|
+
**Casual MCP** is a Python framework for building, evaluating, and serving LLMs with tool-calling capabilities using [Model Context Protocol (MCP)](https://modelcontextprotocol.io).
|
|
31
|
+
It includes:
|
|
32
|
+
|
|
33
|
+
- ✅ A multi-server MCP client using [FastMCP](https://github.com/jlowin/fastmcp)
|
|
34
|
+
- ✅ Provider support for OpenAI and Ollama (powered by [casual-llm](https://github.com/AlexStansfield/casual-llm))
|
|
35
|
+
- ✅ A recursive tool-calling chat loop
|
|
36
|
+
- ✅ Usage statistics tracking (tokens, tool calls, LLM calls)
|
|
37
|
+
- ✅ System prompt templating with Jinja2
|
|
38
|
+
- ✅ A basic API exposing a chat endpoint
|
|
39
|
+
|
|
40
|
+
## ✨ Features
|
|
41
|
+
|
|
42
|
+
- Plug-and-play multi-server tool orchestration
|
|
43
|
+
- OpenAI and Ollama LLM providers (via casual-llm)
|
|
44
|
+
- Usage statistics tracking (tokens, tool calls, LLM calls)
|
|
45
|
+
- Prompt templating with Jinja2
|
|
46
|
+
- Configurable via JSON
|
|
47
|
+
- CLI and API access
|
|
48
|
+
- Extensible architecture
|
|
49
|
+
|
|
50
|
+
## 🔧 Installation
|
|
51
|
+
|
|
52
|
+
### Uv
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uv add casual-mcp
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Pip
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install casual-mcp
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Or for development:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
git clone https://github.com/casualgenius/casual-mcp.git
|
|
68
|
+
cd casual-mcp
|
|
69
|
+
uv sync --group dev
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## 🧩 System Prompt Templates
|
|
73
|
+
|
|
74
|
+
System prompts are defined as [Jinja2](https://jinja.palletsprojects.com) templates in the `prompt-templates/` directory.
|
|
75
|
+
|
|
76
|
+
They are used in the config file to specify a system prompt to use per model.
|
|
77
|
+
|
|
78
|
+
This allows you to define custom prompts for each model — useful when using models that do not natively support tools. Templates are passed the tool list in the `tools` variable.
|
|
79
|
+
|
|
80
|
+
```jinja2
|
|
81
|
+
# prompt-templates/example_prompt.j2
|
|
82
|
+
Here is a list of functions in JSON format that you can invoke:
|
|
83
|
+
[
|
|
84
|
+
{% for tool in tools %}
|
|
85
|
+
{
|
|
86
|
+
"name": "{{ tool.name }}",
|
|
87
|
+
"description": "{{ tool.description }}",
|
|
88
|
+
"parameters": {
|
|
89
|
+
{% for param_name, param in tool.inputSchema.items() %}
|
|
90
|
+
"{{ param_name }}": {
|
|
91
|
+
"description": "{{ param.description }}",
|
|
92
|
+
"type": "{{ param.type }}"{% if param.default is defined %},
|
|
93
|
+
"default": "{{ param.default }}"{% endif %}
|
|
94
|
+
}{% if not loop.last %},{% endif %}
|
|
95
|
+
{% endfor %}
|
|
96
|
+
}
|
|
97
|
+
}{% if not loop.last %},{% endif %}
|
|
98
|
+
{% endfor %}
|
|
99
|
+
]
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## ⚙️ Configuration File (`casual_mcp_config.json`)
|
|
103
|
+
|
|
104
|
+
📄 See the [Programmatic Usage](#-programmatic-usage) section to build configs and messages with typed models.
|
|
105
|
+
|
|
106
|
+
The CLI and API can be configured using a `casual_mcp_config.json` file that defines:
|
|
107
|
+
|
|
108
|
+
- 🔧 Available **models** and their providers
|
|
109
|
+
- 🧰 Available **MCP tool servers**
|
|
110
|
+
- 🧩 Optional tool namespacing behavior
|
|
111
|
+
|
|
112
|
+
### 🔸 Example
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{
|
|
116
|
+
"models": {
|
|
117
|
+
"gpt-4.1": {
|
|
118
|
+
"provider": "openai",
|
|
119
|
+
"model": "gpt-4.1"
|
|
120
|
+
},
|
|
121
|
+
"lm-qwen-3": {
|
|
122
|
+
"provider": "openai",
|
|
123
|
+
"endpoint": "http://localhost:1234/v1",
|
|
124
|
+
"model": "qwen3-8b",
|
|
125
|
+
"template": "lm-studio-native-tools"
|
|
126
|
+
},
|
|
127
|
+
"ollama-qwen": {
|
|
128
|
+
"provider": "ollama",
|
|
129
|
+
"endpoint": "http://localhost:11434",
|
|
130
|
+
"model": "qwen2.5:7b-instruct"
|
|
131
|
+
}
|
|
132
|
+
},
|
|
133
|
+
"servers": {
|
|
134
|
+
"time": {
|
|
135
|
+
"command": "python",
|
|
136
|
+
"args": ["mcp-servers/time/server.py"]
|
|
137
|
+
},
|
|
138
|
+
"weather": {
|
|
139
|
+
"url": "http://localhost:5050/mcp"
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### 🔹 `models`
|
|
146
|
+
|
|
147
|
+
Each model has:
|
|
148
|
+
|
|
149
|
+
- `provider`: `"openai"` or `"ollama"`
|
|
150
|
+
- `model`: the model name (e.g., `gpt-4.1`, `qwen2.5:7b-instruct`)
|
|
151
|
+
- `endpoint`: optional custom endpoint
|
|
152
|
+
- For OpenAI: custom OpenAI-compatible backends (e.g., LM Studio at `http://localhost:1234/v1`)
|
|
153
|
+
- For Ollama: defaults to `http://localhost:11434` if not specified
|
|
154
|
+
- `template`: optional Jinja2 template name for custom system prompt formatting (useful for models without native tool support)
|
|
155
|
+
|
|
156
|
+
### 🔹 `servers`
|
|
157
|
+
|
|
158
|
+
Servers can either be local (over stdio) or remote.
|
|
159
|
+
|
|
160
|
+
#### Local Config:
|
|
161
|
+
- `command`: the command to run the server, e.g `python`, `npm`
|
|
162
|
+
- `args`: the arguments to pass to the server as a list, e.g `["time/server.py"]`
|
|
163
|
+
- Optional: `env`: for subprocess environments, `system_prompt` to override server prompt
|
|
164
|
+
|
|
165
|
+
#### Remote Config:
|
|
166
|
+
- `url`: the url of the mcp server
|
|
167
|
+
- Optional: `transport`: the type of transport, `http`, `sse`, `streamable-http`. Defaults to `http`
|
|
168
|
+
|
|
169
|
+
## Environmental Variables
|
|
170
|
+
|
|
171
|
+
- `OPENAI_API_KEY`: required when using the `openai` provider (can be any string when using local OpenAI-compatible APIs)
|
|
172
|
+
- `TOOL_RESULT_FORMAT`: adjusts the format of tool results returned to the LLM
|
|
173
|
+
- Options: `result`, `function_result`, `function_args_result`
|
|
174
|
+
- Default: `result`
|
|
175
|
+
- `MCP_TOOL_CACHE_TTL`: tool cache TTL in seconds (default: 30, set to 0 for indefinite caching)
|
|
176
|
+
- `LOG_LEVEL`: logging level (default: `INFO`)
|
|
177
|
+
|
|
178
|
+
You can set them using `export` or by creating a `.env` file.
|
|
179
|
+
|
|
180
|
+
## 🛠 CLI Reference
|
|
181
|
+
|
|
182
|
+
### `casual-mcp serve`
|
|
183
|
+
Start the API server.
|
|
184
|
+
|
|
185
|
+
**Options:**
|
|
186
|
+
- `--host`: Host to bind (default `0.0.0.0`)
|
|
187
|
+
- `--port`: Port to serve on (default `8000`)
|
|
188
|
+
|
|
189
|
+
### `casual-mcp servers`
|
|
190
|
+
Loads the config and outputs the list of MCP servers you have configured.
|
|
191
|
+
|
|
192
|
+
#### Example Output
|
|
193
|
+
```
|
|
194
|
+
$ casual-mcp servers
|
|
195
|
+
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━┓
|
|
196
|
+
┃ Name ┃ Type ┃ Command / Url ┃ Env ┃
|
|
197
|
+
┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━┩
|
|
198
|
+
│ math │ local │ mcp-servers/math/server.py │ │
|
|
199
|
+
│ time │ local │ mcp-servers/time-v2/server.py │ │
|
|
200
|
+
│ weather │ local │ mcp-servers/weather/server.py │ │
|
|
201
|
+
│ words │ remote │ https://localhost:3000/mcp │ │
|
|
202
|
+
└─────────┴────────┴───────────────────────────────┴─────┘
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### `casual-mcp models`
|
|
206
|
+
Loads the config and outputs the list of models you have configured.
|
|
207
|
+
|
|
208
|
+
#### Example Output
|
|
209
|
+
```
|
|
210
|
+
$ casual-mcp models
|
|
211
|
+
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
212
|
+
┃ Name ┃ Provider ┃ Model ┃ Endpoint ┃
|
|
213
|
+
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
|
|
214
|
+
│ lm-phi-4-mini │ openai │ phi-4-mini-instruct │ http://kovacs:1234/v1 │
|
|
215
|
+
│ lm-hermes-3 │ openai │ hermes-3-llama-3.2-3b │ http://kovacs:1234/v1 │
|
|
216
|
+
│ lm-groq │ openai │ llama-3-groq-8b-tool-use │ http://kovacs:1234/v1 │
|
|
217
|
+
│ gpt-4o-mini │ openai │ gpt-4o-mini │ │
|
|
218
|
+
│ gpt-4.1-nano │ openai │ gpt-4.1-nano │ │
|
|
219
|
+
│ gpt-4.1-mini │ openai │ gpt-4.1-mini │ │
|
|
220
|
+
│ gpt-4.1 │ openai │ gpt-4.1 │ │
|
|
221
|
+
└───────────────────┴──────────┴───────────────────────────┴────────────────────────┘
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## 🧠 Programmatic Usage
|
|
225
|
+
|
|
226
|
+
You can import and use the core framework in your own Python code.
|
|
227
|
+
|
|
228
|
+
### ✅ Exposed Interfaces
|
|
229
|
+
|
|
230
|
+
#### `McpToolChat`
|
|
231
|
+
Orchestrates LLM interaction with tools using a recursive loop.
|
|
232
|
+
|
|
233
|
+
Accepts any provider that implements the `LLMProvider` protocol from casual-llm. This means you can use casual-llm's built-in providers (OpenAI, Ollama) or create your own custom provider.
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
from casual_llm import LLMProvider, SystemMessage, UserMessage
|
|
237
|
+
from casual_mcp import McpToolChat
|
|
238
|
+
from casual_mcp.tool_cache import ToolCache
|
|
239
|
+
|
|
240
|
+
# provider can be any object implementing the LLMProvider protocol
|
|
241
|
+
tool_cache = ToolCache(mcp_client)
|
|
242
|
+
chat = McpToolChat(mcp_client, provider, system_prompt, tool_cache=tool_cache)
|
|
243
|
+
|
|
244
|
+
# Generate method to take user prompt
|
|
245
|
+
response = await chat.generate("What time is it in London?")
|
|
246
|
+
|
|
247
|
+
# Generate method with session
|
|
248
|
+
response = await chat.generate("What time is it in London?", "my-session-id")
|
|
249
|
+
|
|
250
|
+
# Chat method that takes list of chat messages
|
|
251
|
+
# note: system prompt ignored if sent in messages so no need to set
|
|
252
|
+
chat = McpToolChat(mcp_client, provider, tool_cache=tool_cache)
|
|
253
|
+
messages = [
|
|
254
|
+
SystemMessage(content="You are a cool dude who likes to help the user"),
|
|
255
|
+
UserMessage(content="What time is it in London?")
|
|
256
|
+
]
|
|
257
|
+
response = await chat.chat(messages)
|
|
258
|
+
|
|
259
|
+
# Get usage statistics from the last call
|
|
260
|
+
stats = chat.get_stats()
|
|
261
|
+
if stats:
|
|
262
|
+
print(f"Tokens used: {stats.tokens.total_tokens}")
|
|
263
|
+
print(f"Tool calls: {stats.tool_calls.total}")
|
|
264
|
+
print(f"LLM calls: {stats.llm_calls}")
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
#### Usage Statistics
|
|
268
|
+
|
|
269
|
+
After calling `chat()` or `generate()`, you can retrieve usage statistics via `get_stats()`:
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
response = await chat.chat(messages)
|
|
273
|
+
stats = chat.get_stats()
|
|
274
|
+
|
|
275
|
+
# Token usage (accumulated across all LLM calls in the agentic loop)
|
|
276
|
+
stats.tokens.prompt_tokens # Input tokens
|
|
277
|
+
stats.tokens.completion_tokens # Output tokens
|
|
278
|
+
stats.tokens.total_tokens # Total (computed)
|
|
279
|
+
|
|
280
|
+
# Tool call stats
|
|
281
|
+
stats.tool_calls.by_tool # Dict of tool name -> call count, e.g. {"math_add": 2}
|
|
282
|
+
stats.tool_calls.by_server # Dict of server name -> call count, e.g. {"math": 2}
|
|
283
|
+
stats.tool_calls.total # Total tool calls (computed)
|
|
284
|
+
|
|
285
|
+
# LLM call count
|
|
286
|
+
stats.llm_calls # Number of LLM calls made (1 = no tools, 2+ = tool loop)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Stats are reset at the start of each new `chat()` or `generate()` call. Returns `None` if no calls have been made yet.
|
|
290
|
+
|
|
291
|
+
#### `ProviderFactory`
|
|
292
|
+
Instantiates LLM providers (from casual-llm) based on the selected model config.
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
from casual_mcp import ProviderFactory
|
|
296
|
+
|
|
297
|
+
provider_factory = ProviderFactory()
|
|
298
|
+
provider = provider_factory.get_provider("lm-qwen-3", model_config)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
The factory returns an `LLMProvider` from casual-llm that can be used with `McpToolChat`.
|
|
302
|
+
|
|
303
|
+
> ℹ️ Tool catalogues are cached to avoid repeated `ListTools` calls. The cache refreshes every 30 seconds by default. Override this with the `MCP_TOOL_CACHE_TTL` environment variable (set to `0` or a negative value to cache indefinitely).
|
|
304
|
+
|
|
305
|
+
#### `load_config`
|
|
306
|
+
Loads your `casual_mcp_config.json` into a validated config object.
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
from casual_mcp import load_config
|
|
310
|
+
|
|
311
|
+
config = load_config("casual_mcp_config.json")
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
#### `load_mcp_client`
|
|
315
|
+
Creats a multi server FastMCP client from the config object
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
from casual_mcp import load_mcp_client
|
|
319
|
+
|
|
320
|
+
config = load_mcp_client(config)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
#### Model and Server Configs
|
|
324
|
+
|
|
325
|
+
Exported from `casual_mcp.models`:
|
|
326
|
+
- `StdioServerConfig`
|
|
327
|
+
- `RemoteServerConfig`
|
|
328
|
+
- `OpenAIModelConfig`
|
|
329
|
+
- `OllamaModelConfig`
|
|
330
|
+
- `ChatStats`
|
|
331
|
+
- `TokenUsageStats`
|
|
332
|
+
- `ToolCallStats`
|
|
333
|
+
|
|
334
|
+
Use these types to build valid configs:
|
|
335
|
+
|
|
336
|
+
```python
|
|
337
|
+
from casual_mcp.models import OpenAIModelConfig, OllamaModelConfig, StdioServerConfig
|
|
338
|
+
|
|
339
|
+
openai_model = OpenAIModelConfig(provider="openai", model="gpt-4.1")
|
|
340
|
+
ollama_model = OllamaModelConfig(provider="ollama", model="qwen2.5:7b-instruct", endpoint="http://localhost:11434")
|
|
341
|
+
server = StdioServerConfig(command="python", args=["time/server.py"])
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
#### Chat Messages
|
|
345
|
+
|
|
346
|
+
Exported from `casual_llm` (re-exported from `casual_mcp.models` for backwards compatibility):
|
|
347
|
+
- `AssistantMessage`
|
|
348
|
+
- `SystemMessage`
|
|
349
|
+
- `ToolResultMessage`
|
|
350
|
+
- `UserMessage`
|
|
351
|
+
- `ChatMessage`
|
|
352
|
+
|
|
353
|
+
Use these types to build message chains:
|
|
354
|
+
|
|
355
|
+
```python
|
|
356
|
+
from casual_llm import SystemMessage, UserMessage
|
|
357
|
+
|
|
358
|
+
messages = [
|
|
359
|
+
SystemMessage(content="You are a friendly tool calling assistant."),
|
|
360
|
+
UserMessage(content="What is the time?")
|
|
361
|
+
]
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
### Example
|
|
365
|
+
|
|
366
|
+
```python
|
|
367
|
+
from casual_llm import SystemMessage, UserMessage
|
|
368
|
+
from casual_mcp import McpToolChat, ProviderFactory, load_config, load_mcp_client
|
|
369
|
+
|
|
370
|
+
model = "gpt-4.1-nano"
|
|
371
|
+
messages = [
|
|
372
|
+
SystemMessage(content="""You are a tool calling assistant.
|
|
373
|
+
You have access to up-to-date information through the tools.
|
|
374
|
+
Respond naturally and confidently, as if you already know all the facts."""),
|
|
375
|
+
UserMessage(content="Will I need to take my umbrella to London today?")
|
|
376
|
+
]
|
|
377
|
+
|
|
378
|
+
# Load the Config from the File
|
|
379
|
+
config = load_config("casual_mcp_config.json")
|
|
380
|
+
|
|
381
|
+
# Setup the MCP Client
|
|
382
|
+
mcp_client = load_mcp_client(config)
|
|
383
|
+
|
|
384
|
+
# Get the Provider for the Model
|
|
385
|
+
provider_factory = ProviderFactory()
|
|
386
|
+
provider = provider_factory.get_provider(model, config.models[model])
|
|
387
|
+
|
|
388
|
+
# Perform the Chat and Tool calling
|
|
389
|
+
chat = McpToolChat(mcp_client, provider)
|
|
390
|
+
response_messages = await chat.chat(messages)
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
## 🏗️ Architecture Overview
|
|
394
|
+
|
|
395
|
+
Casual MCP orchestrates a flow between LLMs and MCP tool servers:
|
|
396
|
+
|
|
397
|
+
1. **MCP Client** connects to multiple tool servers (local via stdio or remote via HTTP/SSE)
|
|
398
|
+
2. **Tool Cache** fetches and caches available tools from all connected servers
|
|
399
|
+
3. **Tool Conversion** converts MCP tools to casual-llm's `Tool` format automatically
|
|
400
|
+
4. **ProviderFactory** creates LLM providers from casual-llm based on model config
|
|
401
|
+
5. **McpToolChat** orchestrates the recursive loop:
|
|
402
|
+
- Sends messages + tools to LLM provider
|
|
403
|
+
- LLM returns response (potentially with tool calls)
|
|
404
|
+
- Executes tool calls via MCP client
|
|
405
|
+
- Feeds results back to LLM
|
|
406
|
+
- Repeats until LLM provides final answer
|
|
407
|
+
|
|
408
|
+
```
|
|
409
|
+
┌─────────────┐ ┌──────────────┐ ┌─────────────┐
|
|
410
|
+
│ MCP Servers │─────▶│ Tool Cache │─────▶│ Tool Converter│
|
|
411
|
+
└─────────────┘ └──────────────┘ └─────────────┘
|
|
412
|
+
│ │
|
|
413
|
+
▼ ▼
|
|
414
|
+
┌──────────────────────────────┐
|
|
415
|
+
│ McpToolChat Loop │
|
|
416
|
+
│ │
|
|
417
|
+
│ LLM ──▶ Tool Calls ──▶ MCP │
|
|
418
|
+
│ ▲ │ │
|
|
419
|
+
│ └──────── Results ─────┘ │
|
|
420
|
+
└──────────────────────────────┘
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
### Tool Conversion
|
|
424
|
+
|
|
425
|
+
MCP tools are automatically converted from MCP's format to casual-llm's `Tool` format using the `convert_tools` module. This happens transparently in `McpToolChat.chat()` via `tools_from_mcp()`.
|
|
426
|
+
|
|
427
|
+
## 📊 Response Structure
|
|
428
|
+
|
|
429
|
+
The `chat()` and `generate()` methods return a list of `ChatMessage` objects (from casual-llm):
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
response_messages = await chat.chat(messages)
|
|
433
|
+
# Returns: list[ChatMessage]
|
|
434
|
+
# Each message can be:
|
|
435
|
+
# - AssistantMessage: LLM's response (content + optional tool_calls)
|
|
436
|
+
# - ToolResultMessage: Result from tool execution
|
|
437
|
+
|
|
438
|
+
# Access the final response:
|
|
439
|
+
final_answer = response_messages[-1].content
|
|
440
|
+
|
|
441
|
+
# Check for tool calls in any message:
|
|
442
|
+
for msg in response_messages:
|
|
443
|
+
if hasattr(msg, 'tool_calls') and msg.tool_calls:
|
|
444
|
+
# Message contains tool calls
|
|
445
|
+
for tool_call in msg.tool_calls:
|
|
446
|
+
print(f"Called: {tool_call.function.name}")
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
## 💡 Common Patterns
|
|
450
|
+
|
|
451
|
+
### Using Templates for Models Without Native Tool Support
|
|
452
|
+
|
|
453
|
+
Some models don't natively support tool calling. Use Jinja2 templates to format tools in the system prompt:
|
|
454
|
+
|
|
455
|
+
```json
|
|
456
|
+
{
|
|
457
|
+
"models": {
|
|
458
|
+
"custom-model": {
|
|
459
|
+
"provider": "ollama",
|
|
460
|
+
"model": "some-model:7b",
|
|
461
|
+
"template": "custom-tool-format"
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
Create `prompt-templates/custom-tool-format.j2`:
|
|
468
|
+
```jinja2
|
|
469
|
+
You are a helpful assistant with access to these tools:
|
|
470
|
+
|
|
471
|
+
{% for tool in tools %}
|
|
472
|
+
- {{ tool.name }}: {{ tool.description }}
|
|
473
|
+
Parameters: {{ tool.inputSchema.properties | tojson }}
|
|
474
|
+
{% endfor %}
|
|
475
|
+
|
|
476
|
+
To use a tool, respond with JSON: {"tool": "tool_name", "args": {...}}
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
### Formatting Tool Results
|
|
480
|
+
|
|
481
|
+
Control how tool results are presented to the LLM using `TOOL_RESULT_FORMAT`:
|
|
482
|
+
|
|
483
|
+
```bash
|
|
484
|
+
# Just the raw result
|
|
485
|
+
export TOOL_RESULT_FORMAT=result
|
|
486
|
+
|
|
487
|
+
# Function name → result
|
|
488
|
+
export TOOL_RESULT_FORMAT=function_result
|
|
489
|
+
# Example: "get_weather → Temperature: 72°F"
|
|
490
|
+
|
|
491
|
+
# Function with args → result
|
|
492
|
+
export TOOL_RESULT_FORMAT=function_args_result
|
|
493
|
+
# Example: "get_weather(location='London') → Temperature: 15°C"
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
### Session Management
|
|
497
|
+
|
|
498
|
+
**Important**: Sessions are for testing/development only. In production, manage sessions in your own application.
|
|
499
|
+
|
|
500
|
+
Sessions are stored in-memory and cleared on server restart:
|
|
501
|
+
|
|
502
|
+
```python
|
|
503
|
+
# Using sessions for development/testing
|
|
504
|
+
response = await chat.generate("What's the weather?", session_id="test-123")
|
|
505
|
+
response = await chat.generate("How about tomorrow?", session_id="test-123")
|
|
506
|
+
|
|
507
|
+
# For production: manage your own message history
|
|
508
|
+
messages = []
|
|
509
|
+
messages.append(UserMessage(content="What's the weather?"))
|
|
510
|
+
response_msgs = await chat.chat(messages)
|
|
511
|
+
messages.extend(response_msgs)
|
|
512
|
+
|
|
513
|
+
# Next turn
|
|
514
|
+
messages.append(UserMessage(content="How about tomorrow?"))
|
|
515
|
+
response_msgs = await chat.chat(messages)
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
## 🔧 Troubleshooting
|
|
519
|
+
|
|
520
|
+
### Tool Not Found
|
|
521
|
+
|
|
522
|
+
If you see errors about tools not being found:
|
|
523
|
+
|
|
524
|
+
1. **Check MCP servers are running**: `casual-mcp servers`
|
|
525
|
+
2. **List available tools**: `casual-mcp tools`
|
|
526
|
+
3. **Check tool cache TTL**: Tools are cached for 30 seconds by default. Wait or restart if you just added a server.
|
|
527
|
+
4. **Verify server config**: Ensure `command`, `args`, or `url` are correct in your config
|
|
528
|
+
|
|
529
|
+
### Provider Initialization Issues
|
|
530
|
+
|
|
531
|
+
**OpenAI Provider:**
|
|
532
|
+
```bash
|
|
533
|
+
# Ensure API key is set (even for local APIs)
|
|
534
|
+
export OPENAI_API_KEY=your-key-here
|
|
535
|
+
|
|
536
|
+
# For local OpenAI-compatible APIs (LM Studio, etc):
|
|
537
|
+
export OPENAI_API_KEY=dummy-key # Can be any string
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
**Ollama Provider:**
|
|
541
|
+
```bash
|
|
542
|
+
# Check Ollama is running
|
|
543
|
+
curl http://localhost:11434/api/version
|
|
544
|
+
|
|
545
|
+
# Ensure model is pulled
|
|
546
|
+
ollama pull qwen2.5:7b-instruct
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
### Cache Refresh Behavior
|
|
550
|
+
|
|
551
|
+
Tools are cached with a 30-second TTL by default. If you add/remove MCP servers:
|
|
552
|
+
|
|
553
|
+
- **Option 1**: Wait 30 seconds for automatic refresh
|
|
554
|
+
- **Option 2**: Restart the application
|
|
555
|
+
- **Option 3**: Set `MCP_TOOL_CACHE_TTL=0` for indefinite caching (refresh only on restart)
|
|
556
|
+
- **Option 4**: Set a shorter TTL like `MCP_TOOL_CACHE_TTL=5` for 5-second refresh
|
|
557
|
+
|
|
558
|
+
### Common Configuration Errors
|
|
559
|
+
|
|
560
|
+
```json
|
|
561
|
+
// ❌ Missing required fields
|
|
562
|
+
{
|
|
563
|
+
"models": {
|
|
564
|
+
"my-model": {
|
|
565
|
+
"provider": "openai"
|
|
566
|
+
// Missing "model" field!
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
// ✅ Correct
|
|
572
|
+
{
|
|
573
|
+
"models": {
|
|
574
|
+
"my-model": {
|
|
575
|
+
"provider": "openai",
|
|
576
|
+
"model": "gpt-4.1"
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
// ❌ Invalid provider
|
|
582
|
+
{
|
|
583
|
+
"models": {
|
|
584
|
+
"my-model": {
|
|
585
|
+
"provider": "anthropic", // Not supported!
|
|
586
|
+
"model": "claude-3"
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// ✅ Supported providers
|
|
592
|
+
{
|
|
593
|
+
"models": {
|
|
594
|
+
"openai-model": {
|
|
595
|
+
"provider": "openai",
|
|
596
|
+
"model": "gpt-4.1"
|
|
597
|
+
},
|
|
598
|
+
"ollama-model": {
|
|
599
|
+
"provider": "ollama",
|
|
600
|
+
"model": "qwen2.5:7b"
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+
## 🚀 API Usage
|
|
607
|
+
|
|
608
|
+
### Start the API Server
|
|
609
|
+
|
|
610
|
+
```bash
|
|
611
|
+
casual-mcp serve --host 0.0.0.0 --port 8000
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### Chat
|
|
615
|
+
|
|
616
|
+
#### Endpoint: `POST /chat`
|
|
617
|
+
|
|
618
|
+
#### Request Body:
|
|
619
|
+
- `model`: the LLM model to use
|
|
620
|
+
- `messages`: list of chat messages (system, assistant, user, etc) that you can pass to the api, allowing you to keep your own chat session in the client calling the api
|
|
621
|
+
- `include_stats`: (optional, default: `false`) include usage statistics in the response
|
|
622
|
+
|
|
623
|
+
#### Example:
|
|
624
|
+
```json
|
|
625
|
+
{
|
|
626
|
+
"model": "gpt-4.1-nano",
|
|
627
|
+
"messages": [
|
|
628
|
+
{
|
|
629
|
+
"role": "user",
|
|
630
|
+
"content": "can you explain what the word consistent means?"
|
|
631
|
+
}
|
|
632
|
+
],
|
|
633
|
+
"include_stats": true
|
|
634
|
+
}
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
#### Response with stats:
|
|
638
|
+
```json
|
|
639
|
+
{
|
|
640
|
+
"messages": [...],
|
|
641
|
+
"response": "Consistent means...",
|
|
642
|
+
"stats": {
|
|
643
|
+
"tokens": {
|
|
644
|
+
"prompt_tokens": 150,
|
|
645
|
+
"completion_tokens": 75,
|
|
646
|
+
"total_tokens": 225
|
|
647
|
+
},
|
|
648
|
+
"tool_calls": {
|
|
649
|
+
"by_tool": {"words_define": 1},
|
|
650
|
+
"by_server": {"words": 1},
|
|
651
|
+
"total": 1
|
|
652
|
+
},
|
|
653
|
+
"llm_calls": 2
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
```
|
|
657
|
+
|
|
658
|
+
### Generate
|
|
659
|
+
|
|
660
|
+
The generate endpoint allows you to send a user prompt as a string.
|
|
661
|
+
|
|
662
|
+
It also support sessions that keep a record of all messages in the session and feeds them back into the LLM for context. Sessions are stored in memory so are cleared when the server is restarted
|
|
663
|
+
|
|
664
|
+
#### Endpoint: `POST /generate`
|
|
665
|
+
|
|
666
|
+
#### Request Body:
|
|
667
|
+
- `model`: the LLM model to use
|
|
668
|
+
- `prompt`: the user prompt
|
|
669
|
+
- `session_id`: an optional ID that stores all the messages from the session and provides them back to the LLM for context
|
|
670
|
+
- `include_stats`: (optional, default: `false`) include usage statistics in the response
|
|
671
|
+
|
|
672
|
+
#### Example:
|
|
673
|
+
```json
|
|
674
|
+
{
|
|
675
|
+
"session_id": "my-session",
|
|
676
|
+
"model": "gpt-4o-mini",
|
|
677
|
+
"prompt": "can you explain what the word consistent means?",
|
|
678
|
+
"include_stats": true
|
|
679
|
+
}
|
|
680
|
+
```
|
|
681
|
+
|
|
682
|
+
### Get Session
|
|
683
|
+
|
|
684
|
+
Get all the messages from a session
|
|
685
|
+
|
|
686
|
+
#### Endpoint: `GET /generate/session/{session_id}`
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
## License
|
|
690
|
+
|
|
691
|
+
This software is released under the [MIT License](LICENSE)
|