casual-mcp 0.3.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. casual_mcp-0.6.0/PKG-INFO +691 -0
  2. casual_mcp-0.6.0/README.md +667 -0
  3. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/pyproject.toml +25 -21
  4. casual_mcp-0.6.0/src/casual_mcp/__init__.py +24 -0
  5. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/cli.py +24 -24
  6. casual_mcp-0.6.0/src/casual_mcp/convert_tools.py +68 -0
  7. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/logging.py +6 -2
  8. casual_mcp-0.6.0/src/casual_mcp/main.py +120 -0
  9. casual_mcp-0.6.0/src/casual_mcp/mcp_tool_chat.py +232 -0
  10. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/__init__.py +21 -8
  11. casual_mcp-0.6.0/src/casual_mcp/models/chat_stats.py +37 -0
  12. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/config.py +2 -2
  13. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/generation_error.py +1 -1
  14. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/model_config.py +3 -3
  15. casual_mcp-0.6.0/src/casual_mcp/provider_factory.py +47 -0
  16. casual_mcp-0.6.0/src/casual_mcp/tool_cache.py +114 -0
  17. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/utils.py +18 -11
  18. casual_mcp-0.6.0/src/casual_mcp.egg-info/PKG-INFO +691 -0
  19. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/SOURCES.txt +11 -7
  20. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/requires.txt +3 -10
  21. casual_mcp-0.6.0/tests/test_chat_stats.py +118 -0
  22. casual_mcp-0.6.0/tests/test_mcp_tool_chat.py +499 -0
  23. casual_mcp-0.6.0/tests/test_provider_factory.py +106 -0
  24. casual_mcp-0.6.0/tests/test_session_management.py +57 -0
  25. casual_mcp-0.6.0/tests/test_tool_cache.py +173 -0
  26. casual_mcp-0.6.0/tests/test_tools.py +114 -0
  27. casual_mcp-0.6.0/tests/test_utils.py +130 -0
  28. casual_mcp-0.3.1/PKG-INFO +0 -398
  29. casual_mcp-0.3.1/README.md +0 -368
  30. casual_mcp-0.3.1/src/casual_mcp/__init__.py +0 -13
  31. casual_mcp-0.3.1/src/casual_mcp/main.py +0 -119
  32. casual_mcp-0.3.1/src/casual_mcp/mcp_tool_chat.py +0 -154
  33. casual_mcp-0.3.1/src/casual_mcp/models/messages.py +0 -31
  34. casual_mcp-0.3.1/src/casual_mcp/models/tool_call.py +0 -14
  35. casual_mcp-0.3.1/src/casual_mcp/providers/__init__.py +0 -0
  36. casual_mcp-0.3.1/src/casual_mcp/providers/abstract_provider.py +0 -15
  37. casual_mcp-0.3.1/src/casual_mcp/providers/ollama_provider.py +0 -72
  38. casual_mcp-0.3.1/src/casual_mcp/providers/openai_provider.py +0 -178
  39. casual_mcp-0.3.1/src/casual_mcp/providers/provider_factory.py +0 -56
  40. casual_mcp-0.3.1/src/casual_mcp.egg-info/PKG-INFO +0 -398
  41. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/LICENSE +0 -0
  42. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/setup.cfg +0 -0
  43. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp/models/mcp_server_config.py +0 -0
  44. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/dependency_links.txt +0 -0
  45. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/entry_points.txt +0 -0
  46. {casual_mcp-0.3.1 → casual_mcp-0.6.0}/src/casual_mcp.egg-info/top_level.txt +0 -0
@@ -0,0 +1,691 @@
1
+ Metadata-Version: 2.4
2
+ Name: casual-mcp
3
+ Version: 0.6.0
4
+ Summary: Multi-server MCP client for LLM tool orchestration
5
+ Author: Alex Stansfield
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/casualgenius/casual-mcp
8
+ Project-URL: Repository, https://github.com/casualgenius/casual-mcp
9
+ Project-URL: Issue Tracker, https://github.com/casualgenius/casual-mcp/issues
10
+ Requires-Python: >=3.10
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: casual-llm[openai]>=0.4.3
14
+ Requires-Dist: dateparser>=1.2.1
15
+ Requires-Dist: fastapi>=0.115.12
16
+ Requires-Dist: fastmcp>=2.12.4
17
+ Requires-Dist: jinja2>=3.1.6
18
+ Requires-Dist: python-dotenv>=1.1.0
19
+ Requires-Dist: requests>=2.32.3
20
+ Requires-Dist: rich>=14.0.0
21
+ Requires-Dist: typer>=0.19.2
22
+ Requires-Dist: uvicorn>=0.34.2
23
+ Dynamic: license-file
24
+
25
+ # 🧠 Casual MCP
26
+
27
+ ![PyPI](https://img.shields.io/pypi/v/casual-mcp)
28
+ ![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)
29
+
30
+ **Casual MCP** is a Python framework for building, evaluating, and serving LLMs with tool-calling capabilities using [Model Context Protocol (MCP)](https://modelcontextprotocol.io).
31
+ It includes:
32
+
33
+ - ✅ A multi-server MCP client using [FastMCP](https://github.com/jlowin/fastmcp)
34
+ - ✅ Provider support for OpenAI and Ollama (powered by [casual-llm](https://github.com/AlexStansfield/casual-llm))
35
+ - ✅ A recursive tool-calling chat loop
36
+ - ✅ Usage statistics tracking (tokens, tool calls, LLM calls)
37
+ - ✅ System prompt templating with Jinja2
38
+ - ✅ A basic API exposing a chat endpoint
39
+
40
+ ## ✨ Features
41
+
42
+ - Plug-and-play multi-server tool orchestration
43
+ - OpenAI and Ollama LLM providers (via casual-llm)
44
+ - Usage statistics tracking (tokens, tool calls, LLM calls)
45
+ - Prompt templating with Jinja2
46
+ - Configurable via JSON
47
+ - CLI and API access
48
+ - Extensible architecture
49
+
50
+ ## 🔧 Installation
51
+
52
+ ### Uv
53
+
54
+ ```bash
55
+ uv add casual-mcp
56
+ ```
57
+
58
+ ### Pip
59
+
60
+ ```bash
61
+ pip install casual-mcp
62
+ ```
63
+
64
+ Or for development:
65
+
66
+ ```bash
67
+ git clone https://github.com/casualgenius/casual-mcp.git
68
+ cd casual-mcp
69
+ uv sync --group dev
70
+ ```
71
+
72
+ ## 🧩 System Prompt Templates
73
+
74
+ System prompts are defined as [Jinja2](https://jinja.palletsprojects.com) templates in the `prompt-templates/` directory.
75
+
76
+ They are used in the config file to specify a system prompt to use per model.
77
+
78
+ This allows you to define custom prompts for each model — useful when using models that do not natively support tools. Templates are passed the tool list in the `tools` variable.
79
+
80
+ ```jinja2
81
+ # prompt-templates/example_prompt.j2
82
+ Here is a list of functions in JSON format that you can invoke:
83
+ [
84
+ {% for tool in tools %}
85
+ {
86
+ "name": "{{ tool.name }}",
87
+ "description": "{{ tool.description }}",
88
+ "parameters": {
89
+ {% for param_name, param in tool.inputSchema.items() %}
90
+ "{{ param_name }}": {
91
+ "description": "{{ param.description }}",
92
+ "type": "{{ param.type }}"{% if param.default is defined %},
93
+ "default": "{{ param.default }}"{% endif %}
94
+ }{% if not loop.last %},{% endif %}
95
+ {% endfor %}
96
+ }
97
+ }{% if not loop.last %},{% endif %}
98
+ {% endfor %}
99
+ ]
100
+ ```
101
+
102
+ ## ⚙️ Configuration File (`casual_mcp_config.json`)
103
+
104
+ 📄 See the [Programmatic Usage](#-programmatic-usage) section to build configs and messages with typed models.
105
+
106
+ The CLI and API can be configured using a `casual_mcp_config.json` file that defines:
107
+
108
+ - 🔧 Available **models** and their providers
109
+ - 🧰 Available **MCP tool servers**
110
+ - 🧩 Optional tool namespacing behavior
111
+
112
+ ### 🔸 Example
113
+
114
+ ```json
115
+ {
116
+ "models": {
117
+ "gpt-4.1": {
118
+ "provider": "openai",
119
+ "model": "gpt-4.1"
120
+ },
121
+ "lm-qwen-3": {
122
+ "provider": "openai",
123
+ "endpoint": "http://localhost:1234/v1",
124
+ "model": "qwen3-8b",
125
+ "template": "lm-studio-native-tools"
126
+ },
127
+ "ollama-qwen": {
128
+ "provider": "ollama",
129
+ "endpoint": "http://localhost:11434",
130
+ "model": "qwen2.5:7b-instruct"
131
+ }
132
+ },
133
+ "servers": {
134
+ "time": {
135
+ "command": "python",
136
+ "args": ["mcp-servers/time/server.py"]
137
+ },
138
+ "weather": {
139
+ "url": "http://localhost:5050/mcp"
140
+ }
141
+ }
142
+ }
143
+ ```
144
+
145
+ ### 🔹 `models`
146
+
147
+ Each model has:
148
+
149
+ - `provider`: `"openai"` or `"ollama"`
150
+ - `model`: the model name (e.g., `gpt-4.1`, `qwen2.5:7b-instruct`)
151
+ - `endpoint`: optional custom endpoint
152
+ - For OpenAI: custom OpenAI-compatible backends (e.g., LM Studio at `http://localhost:1234/v1`)
153
+ - For Ollama: defaults to `http://localhost:11434` if not specified
154
+ - `template`: optional Jinja2 template name for custom system prompt formatting (useful for models without native tool support)
155
+
156
+ ### 🔹 `servers`
157
+
158
+ Servers can either be local (over stdio) or remote.
159
+
160
+ #### Local Config:
161
+ - `command`: the command to run the server, e.g `python`, `npm`
162
+ - `args`: the arguments to pass to the server as a list, e.g `["time/server.py"]`
163
+ - Optional: `env`: for subprocess environments, `system_prompt` to override server prompt
164
+
165
+ #### Remote Config:
166
+ - `url`: the url of the mcp server
167
+ - Optional: `transport`: the type of transport, `http`, `sse`, `streamable-http`. Defaults to `http`
168
+
169
+ ## Environmental Variables
170
+
171
+ - `OPENAI_API_KEY`: required when using the `openai` provider (can be any string when using local OpenAI-compatible APIs)
172
+ - `TOOL_RESULT_FORMAT`: adjusts the format of tool results returned to the LLM
173
+ - Options: `result`, `function_result`, `function_args_result`
174
+ - Default: `result`
175
+ - `MCP_TOOL_CACHE_TTL`: tool cache TTL in seconds (default: 30, set to 0 for indefinite caching)
176
+ - `LOG_LEVEL`: logging level (default: `INFO`)
177
+
178
+ You can set them using `export` or by creating a `.env` file.
179
+
180
+ ## 🛠 CLI Reference
181
+
182
+ ### `casual-mcp serve`
183
+ Start the API server.
184
+
185
+ **Options:**
186
+ - `--host`: Host to bind (default `0.0.0.0`)
187
+ - `--port`: Port to serve on (default `8000`)
188
+
189
+ ### `casual-mcp servers`
190
+ Loads the config and outputs the list of MCP servers you have configured.
191
+
192
+ #### Example Output
193
+ ```
194
+ $ casual-mcp servers
195
+ ┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━┓
196
+ ┃ Name ┃ Type ┃ Command / Url ┃ Env ┃
197
+ ┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━┩
198
+ │ math │ local │ mcp-servers/math/server.py │ │
199
+ │ time │ local │ mcp-servers/time-v2/server.py │ │
200
+ │ weather │ local │ mcp-servers/weather/server.py │ │
201
+ │ words │ remote │ https://localhost:3000/mcp │ │
202
+ └─────────┴────────┴───────────────────────────────┴─────┘
203
+ ```
204
+
205
+ ### `casual-mcp models`
206
+ Loads the config and outputs the list of models you have configured.
207
+
208
+ #### Example Output
209
+ ```
210
+ $ casual-mcp models
211
+ ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
212
+ ┃ Name ┃ Provider ┃ Model ┃ Endpoint ┃
213
+ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
214
+ │ lm-phi-4-mini │ openai │ phi-4-mini-instruct │ http://kovacs:1234/v1 │
215
+ │ lm-hermes-3 │ openai │ hermes-3-llama-3.2-3b │ http://kovacs:1234/v1 │
216
+ │ lm-groq │ openai │ llama-3-groq-8b-tool-use │ http://kovacs:1234/v1 │
217
+ │ gpt-4o-mini │ openai │ gpt-4o-mini │ │
218
+ │ gpt-4.1-nano │ openai │ gpt-4.1-nano │ │
219
+ │ gpt-4.1-mini │ openai │ gpt-4.1-mini │ │
220
+ │ gpt-4.1 │ openai │ gpt-4.1 │ │
221
+ └───────────────────┴──────────┴───────────────────────────┴────────────────────────┘
222
+ ```
223
+
224
+ ## 🧠 Programmatic Usage
225
+
226
+ You can import and use the core framework in your own Python code.
227
+
228
+ ### ✅ Exposed Interfaces
229
+
230
+ #### `McpToolChat`
231
+ Orchestrates LLM interaction with tools using a recursive loop.
232
+
233
+ Accepts any provider that implements the `LLMProvider` protocol from casual-llm. This means you can use casual-llm's built-in providers (OpenAI, Ollama) or create your own custom provider.
234
+
235
+ ```python
236
+ from casual_llm import LLMProvider, SystemMessage, UserMessage
237
+ from casual_mcp import McpToolChat
238
+ from casual_mcp.tool_cache import ToolCache
239
+
240
+ # provider can be any object implementing the LLMProvider protocol
241
+ tool_cache = ToolCache(mcp_client)
242
+ chat = McpToolChat(mcp_client, provider, system_prompt, tool_cache=tool_cache)
243
+
244
+ # Generate method to take user prompt
245
+ response = await chat.generate("What time is it in London?")
246
+
247
+ # Generate method with session
248
+ response = await chat.generate("What time is it in London?", "my-session-id")
249
+
250
+ # Chat method that takes list of chat messages
251
+ # note: system prompt ignored if sent in messages so no need to set
252
+ chat = McpToolChat(mcp_client, provider, tool_cache=tool_cache)
253
+ messages = [
254
+ SystemMessage(content="You are a cool dude who likes to help the user"),
255
+ UserMessage(content="What time is it in London?")
256
+ ]
257
+ response = await chat.chat(messages)
258
+
259
+ # Get usage statistics from the last call
260
+ stats = chat.get_stats()
261
+ if stats:
262
+ print(f"Tokens used: {stats.tokens.total_tokens}")
263
+ print(f"Tool calls: {stats.tool_calls.total}")
264
+ print(f"LLM calls: {stats.llm_calls}")
265
+ ```
266
+
267
+ #### Usage Statistics
268
+
269
+ After calling `chat()` or `generate()`, you can retrieve usage statistics via `get_stats()`:
270
+
271
+ ```python
272
+ response = await chat.chat(messages)
273
+ stats = chat.get_stats()
274
+
275
+ # Token usage (accumulated across all LLM calls in the agentic loop)
276
+ stats.tokens.prompt_tokens # Input tokens
277
+ stats.tokens.completion_tokens # Output tokens
278
+ stats.tokens.total_tokens # Total (computed)
279
+
280
+ # Tool call stats
281
+ stats.tool_calls.by_tool # Dict of tool name -> call count, e.g. {"math_add": 2}
282
+ stats.tool_calls.by_server # Dict of server name -> call count, e.g. {"math": 2}
283
+ stats.tool_calls.total # Total tool calls (computed)
284
+
285
+ # LLM call count
286
+ stats.llm_calls # Number of LLM calls made (1 = no tools, 2+ = tool loop)
287
+ ```
288
+
289
+ Stats are reset at the start of each new `chat()` or `generate()` call. Returns `None` if no calls have been made yet.
290
+
291
+ #### `ProviderFactory`
292
+ Instantiates LLM providers (from casual-llm) based on the selected model config.
293
+
294
+ ```python
295
+ from casual_mcp import ProviderFactory
296
+
297
+ provider_factory = ProviderFactory()
298
+ provider = provider_factory.get_provider("lm-qwen-3", model_config)
299
+ ```
300
+
301
+ The factory returns an `LLMProvider` from casual-llm that can be used with `McpToolChat`.
302
+
303
+ > ℹ️ Tool catalogues are cached to avoid repeated `ListTools` calls. The cache refreshes every 30 seconds by default. Override this with the `MCP_TOOL_CACHE_TTL` environment variable (set to `0` or a negative value to cache indefinitely).
304
+
305
+ #### `load_config`
306
+ Loads your `casual_mcp_config.json` into a validated config object.
307
+
308
+ ```python
309
+ from casual_mcp import load_config
310
+
311
+ config = load_config("casual_mcp_config.json")
312
+ ```
313
+
314
+ #### `load_mcp_client`
315
+ Creats a multi server FastMCP client from the config object
316
+
317
+ ```python
318
+ from casual_mcp import load_mcp_client
319
+
320
+ config = load_mcp_client(config)
321
+ ```
322
+
323
+ #### Model and Server Configs
324
+
325
+ Exported from `casual_mcp.models`:
326
+ - `StdioServerConfig`
327
+ - `RemoteServerConfig`
328
+ - `OpenAIModelConfig`
329
+ - `OllamaModelConfig`
330
+ - `ChatStats`
331
+ - `TokenUsageStats`
332
+ - `ToolCallStats`
333
+
334
+ Use these types to build valid configs:
335
+
336
+ ```python
337
+ from casual_mcp.models import OpenAIModelConfig, OllamaModelConfig, StdioServerConfig
338
+
339
+ openai_model = OpenAIModelConfig(provider="openai", model="gpt-4.1")
340
+ ollama_model = OllamaModelConfig(provider="ollama", model="qwen2.5:7b-instruct", endpoint="http://localhost:11434")
341
+ server = StdioServerConfig(command="python", args=["time/server.py"])
342
+ ```
343
+
344
+ #### Chat Messages
345
+
346
+ Exported from `casual_llm` (re-exported from `casual_mcp.models` for backwards compatibility):
347
+ - `AssistantMessage`
348
+ - `SystemMessage`
349
+ - `ToolResultMessage`
350
+ - `UserMessage`
351
+ - `ChatMessage`
352
+
353
+ Use these types to build message chains:
354
+
355
+ ```python
356
+ from casual_llm import SystemMessage, UserMessage
357
+
358
+ messages = [
359
+ SystemMessage(content="You are a friendly tool calling assistant."),
360
+ UserMessage(content="What is the time?")
361
+ ]
362
+ ```
363
+
364
+ ### Example
365
+
366
+ ```python
367
+ from casual_llm import SystemMessage, UserMessage
368
+ from casual_mcp import McpToolChat, ProviderFactory, load_config, load_mcp_client
369
+
370
+ model = "gpt-4.1-nano"
371
+ messages = [
372
+ SystemMessage(content="""You are a tool calling assistant.
373
+ You have access to up-to-date information through the tools.
374
+ Respond naturally and confidently, as if you already know all the facts."""),
375
+ UserMessage(content="Will I need to take my umbrella to London today?")
376
+ ]
377
+
378
+ # Load the Config from the File
379
+ config = load_config("casual_mcp_config.json")
380
+
381
+ # Setup the MCP Client
382
+ mcp_client = load_mcp_client(config)
383
+
384
+ # Get the Provider for the Model
385
+ provider_factory = ProviderFactory()
386
+ provider = provider_factory.get_provider(model, config.models[model])
387
+
388
+ # Perform the Chat and Tool calling
389
+ chat = McpToolChat(mcp_client, provider)
390
+ response_messages = await chat.chat(messages)
391
+ ```
392
+
393
+ ## 🏗️ Architecture Overview
394
+
395
+ Casual MCP orchestrates a flow between LLMs and MCP tool servers:
396
+
397
+ 1. **MCP Client** connects to multiple tool servers (local via stdio or remote via HTTP/SSE)
398
+ 2. **Tool Cache** fetches and caches available tools from all connected servers
399
+ 3. **Tool Conversion** converts MCP tools to casual-llm's `Tool` format automatically
400
+ 4. **ProviderFactory** creates LLM providers from casual-llm based on model config
401
+ 5. **McpToolChat** orchestrates the recursive loop:
402
+ - Sends messages + tools to LLM provider
403
+ - LLM returns response (potentially with tool calls)
404
+ - Executes tool calls via MCP client
405
+ - Feeds results back to LLM
406
+ - Repeats until LLM provides final answer
407
+
408
+ ```
409
+ ┌─────────────┐ ┌──────────────┐ ┌─────────────┐
410
+ │ MCP Servers │─────▶│ Tool Cache │─────▶│ Tool Converter│
411
+ └─────────────┘ └──────────────┘ └─────────────┘
412
+ │ │
413
+ ▼ ▼
414
+ ┌──────────────────────────────┐
415
+ │ McpToolChat Loop │
416
+ │ │
417
+ │ LLM ──▶ Tool Calls ──▶ MCP │
418
+ │ ▲ │ │
419
+ │ └──────── Results ─────┘ │
420
+ └──────────────────────────────┘
421
+ ```
422
+
423
+ ### Tool Conversion
424
+
425
+ MCP tools are automatically converted from MCP's format to casual-llm's `Tool` format using the `convert_tools` module. This happens transparently in `McpToolChat.chat()` via `tools_from_mcp()`.
426
+
427
+ ## 📊 Response Structure
428
+
429
+ The `chat()` and `generate()` methods return a list of `ChatMessage` objects (from casual-llm):
430
+
431
+ ```python
432
+ response_messages = await chat.chat(messages)
433
+ # Returns: list[ChatMessage]
434
+ # Each message can be:
435
+ # - AssistantMessage: LLM's response (content + optional tool_calls)
436
+ # - ToolResultMessage: Result from tool execution
437
+
438
+ # Access the final response:
439
+ final_answer = response_messages[-1].content
440
+
441
+ # Check for tool calls in any message:
442
+ for msg in response_messages:
443
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
444
+ # Message contains tool calls
445
+ for tool_call in msg.tool_calls:
446
+ print(f"Called: {tool_call.function.name}")
447
+ ```
448
+
449
+ ## 💡 Common Patterns
450
+
451
+ ### Using Templates for Models Without Native Tool Support
452
+
453
+ Some models don't natively support tool calling. Use Jinja2 templates to format tools in the system prompt:
454
+
455
+ ```json
456
+ {
457
+ "models": {
458
+ "custom-model": {
459
+ "provider": "ollama",
460
+ "model": "some-model:7b",
461
+ "template": "custom-tool-format"
462
+ }
463
+ }
464
+ }
465
+ ```
466
+
467
+ Create `prompt-templates/custom-tool-format.j2`:
468
+ ```jinja2
469
+ You are a helpful assistant with access to these tools:
470
+
471
+ {% for tool in tools %}
472
+ - {{ tool.name }}: {{ tool.description }}
473
+ Parameters: {{ tool.inputSchema.properties | tojson }}
474
+ {% endfor %}
475
+
476
+ To use a tool, respond with JSON: {"tool": "tool_name", "args": {...}}
477
+ ```
478
+
479
+ ### Formatting Tool Results
480
+
481
+ Control how tool results are presented to the LLM using `TOOL_RESULT_FORMAT`:
482
+
483
+ ```bash
484
+ # Just the raw result
485
+ export TOOL_RESULT_FORMAT=result
486
+
487
+ # Function name → result
488
+ export TOOL_RESULT_FORMAT=function_result
489
+ # Example: "get_weather → Temperature: 72°F"
490
+
491
+ # Function with args → result
492
+ export TOOL_RESULT_FORMAT=function_args_result
493
+ # Example: "get_weather(location='London') → Temperature: 15°C"
494
+ ```
495
+
496
+ ### Session Management
497
+
498
+ **Important**: Sessions are for testing/development only. In production, manage sessions in your own application.
499
+
500
+ Sessions are stored in-memory and cleared on server restart:
501
+
502
+ ```python
503
+ # Using sessions for development/testing
504
+ response = await chat.generate("What's the weather?", session_id="test-123")
505
+ response = await chat.generate("How about tomorrow?", session_id="test-123")
506
+
507
+ # For production: manage your own message history
508
+ messages = []
509
+ messages.append(UserMessage(content="What's the weather?"))
510
+ response_msgs = await chat.chat(messages)
511
+ messages.extend(response_msgs)
512
+
513
+ # Next turn
514
+ messages.append(UserMessage(content="How about tomorrow?"))
515
+ response_msgs = await chat.chat(messages)
516
+ ```
517
+
518
+ ## 🔧 Troubleshooting
519
+
520
+ ### Tool Not Found
521
+
522
+ If you see errors about tools not being found:
523
+
524
+ 1. **Check MCP servers are running**: `casual-mcp servers`
525
+ 2. **List available tools**: `casual-mcp tools`
526
+ 3. **Check tool cache TTL**: Tools are cached for 30 seconds by default. Wait or restart if you just added a server.
527
+ 4. **Verify server config**: Ensure `command`, `args`, or `url` are correct in your config
528
+
529
+ ### Provider Initialization Issues
530
+
531
+ **OpenAI Provider:**
532
+ ```bash
533
+ # Ensure API key is set (even for local APIs)
534
+ export OPENAI_API_KEY=your-key-here
535
+
536
+ # For local OpenAI-compatible APIs (LM Studio, etc):
537
+ export OPENAI_API_KEY=dummy-key # Can be any string
538
+ ```
539
+
540
+ **Ollama Provider:**
541
+ ```bash
542
+ # Check Ollama is running
543
+ curl http://localhost:11434/api/version
544
+
545
+ # Ensure model is pulled
546
+ ollama pull qwen2.5:7b-instruct
547
+ ```
548
+
549
+ ### Cache Refresh Behavior
550
+
551
+ Tools are cached with a 30-second TTL by default. If you add/remove MCP servers:
552
+
553
+ - **Option 1**: Wait 30 seconds for automatic refresh
554
+ - **Option 2**: Restart the application
555
+ - **Option 3**: Set `MCP_TOOL_CACHE_TTL=0` for indefinite caching (refresh only on restart)
556
+ - **Option 4**: Set a shorter TTL like `MCP_TOOL_CACHE_TTL=5` for 5-second refresh
557
+
558
+ ### Common Configuration Errors
559
+
560
+ ```json
561
+ // ❌ Missing required fields
562
+ {
563
+ "models": {
564
+ "my-model": {
565
+ "provider": "openai"
566
+ // Missing "model" field!
567
+ }
568
+ }
569
+ }
570
+
571
+ // ✅ Correct
572
+ {
573
+ "models": {
574
+ "my-model": {
575
+ "provider": "openai",
576
+ "model": "gpt-4.1"
577
+ }
578
+ }
579
+ }
580
+
581
+ // ❌ Invalid provider
582
+ {
583
+ "models": {
584
+ "my-model": {
585
+ "provider": "anthropic", // Not supported!
586
+ "model": "claude-3"
587
+ }
588
+ }
589
+ }
590
+
591
+ // ✅ Supported providers
592
+ {
593
+ "models": {
594
+ "openai-model": {
595
+ "provider": "openai",
596
+ "model": "gpt-4.1"
597
+ },
598
+ "ollama-model": {
599
+ "provider": "ollama",
600
+ "model": "qwen2.5:7b"
601
+ }
602
+ }
603
+ }
604
+ ```
605
+
606
+ ## 🚀 API Usage
607
+
608
+ ### Start the API Server
609
+
610
+ ```bash
611
+ casual-mcp serve --host 0.0.0.0 --port 8000
612
+ ```
613
+
614
+ ### Chat
615
+
616
+ #### Endpoint: `POST /chat`
617
+
618
+ #### Request Body:
619
+ - `model`: the LLM model to use
620
+ - `messages`: list of chat messages (system, assistant, user, etc) that you can pass to the api, allowing you to keep your own chat session in the client calling the api
621
+ - `include_stats`: (optional, default: `false`) include usage statistics in the response
622
+
623
+ #### Example:
624
+ ```json
625
+ {
626
+ "model": "gpt-4.1-nano",
627
+ "messages": [
628
+ {
629
+ "role": "user",
630
+ "content": "can you explain what the word consistent means?"
631
+ }
632
+ ],
633
+ "include_stats": true
634
+ }
635
+ ```
636
+
637
+ #### Response with stats:
638
+ ```json
639
+ {
640
+ "messages": [...],
641
+ "response": "Consistent means...",
642
+ "stats": {
643
+ "tokens": {
644
+ "prompt_tokens": 150,
645
+ "completion_tokens": 75,
646
+ "total_tokens": 225
647
+ },
648
+ "tool_calls": {
649
+ "by_tool": {"words_define": 1},
650
+ "by_server": {"words": 1},
651
+ "total": 1
652
+ },
653
+ "llm_calls": 2
654
+ }
655
+ }
656
+ ```
657
+
658
+ ### Generate
659
+
660
+ The generate endpoint allows you to send a user prompt as a string.
661
+
662
+ It also support sessions that keep a record of all messages in the session and feeds them back into the LLM for context. Sessions are stored in memory so are cleared when the server is restarted
663
+
664
+ #### Endpoint: `POST /generate`
665
+
666
+ #### Request Body:
667
+ - `model`: the LLM model to use
668
+ - `prompt`: the user prompt
669
+ - `session_id`: an optional ID that stores all the messages from the session and provides them back to the LLM for context
670
+ - `include_stats`: (optional, default: `false`) include usage statistics in the response
671
+
672
+ #### Example:
673
+ ```json
674
+ {
675
+ "session_id": "my-session",
676
+ "model": "gpt-4o-mini",
677
+ "prompt": "can you explain what the word consistent means?",
678
+ "include_stats": true
679
+ }
680
+ ```
681
+
682
+ ### Get Session
683
+
684
+ Get all the messages from a session
685
+
686
+ #### Endpoint: `GET /generate/session/{session_id}`
687
+
688
+
689
+ ## License
690
+
691
+ This software is released under the [MIT License](LICENSE)