mimo-multimodal-mcp 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimo_multimodal_mcp-0.3.0/.env.example +6 -0
- mimo_multimodal_mcp-0.3.0/.github/workflows/publish.yml +24 -0
- mimo_multimodal_mcp-0.3.0/.gitignore +33 -0
- mimo_multimodal_mcp-0.3.0/PKG-INFO +172 -0
- mimo_multimodal_mcp-0.3.0/README.md +162 -0
- mimo_multimodal_mcp-0.3.0/pyproject.toml +21 -0
- mimo_multimodal_mcp-0.3.0/src/mimo_multimodal_mcp/__init__.py +5 -0
- mimo_multimodal_mcp-0.3.0/src/mimo_multimodal_mcp/server.py +354 -0
- mimo_multimodal_mcp-0.3.0/test_server.py +89 -0
- mimo_multimodal_mcp-0.3.0/uv.lock +1124 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [created]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
environment: pypi
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
- name: Install build tools
|
|
20
|
+
run: pip install build
|
|
21
|
+
- name: Build package
|
|
22
|
+
run: python -m build
|
|
23
|
+
- name: Publish to PyPI
|
|
24
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
.venv/
|
|
8
|
+
venv/
|
|
9
|
+
ENV/
|
|
10
|
+
|
|
11
|
+
# Distribution
|
|
12
|
+
dist/
|
|
13
|
+
build/
|
|
14
|
+
*.egg-info/
|
|
15
|
+
*.egg
|
|
16
|
+
|
|
17
|
+
# Environment
|
|
18
|
+
.env
|
|
19
|
+
.env.local
|
|
20
|
+
.venv/key
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.idea/
|
|
24
|
+
.vscode/
|
|
25
|
+
*.swp
|
|
26
|
+
*.swo
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
31
|
+
|
|
32
|
+
# Lock files (optional, uncomment if needed)
|
|
33
|
+
# uv.lock
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mimo-multimodal-mcp
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: MCP server for Xiaomi MiMo multimodal understanding (image, audio, video)
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: mcp[cli]>=1.2.0
|
|
7
|
+
Requires-Dist: openai>=1.0.0
|
|
8
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# MiMo Multimodal Understanding MCP Server
|
|
12
|
+
|
|
13
|
+
MCP server for Xiaomi MiMo multimodal understanding API (image, audio, video).
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- **Image Understanding**: Single/multiple images, URL and local file support
|
|
18
|
+
- **Audio Understanding**: Single/multiple audio, URL and local file support
|
|
19
|
+
- **Video Understanding**: Single/multiple video, URL and local file support, configurable fps and resolution
|
|
20
|
+
|
|
21
|
+
## Setup
|
|
22
|
+
|
|
23
|
+
### 1. Install dependencies
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
uv sync
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### 2. Configure API Key
|
|
30
|
+
|
|
31
|
+
Copy `.env.example` to `.env` and fill in your API key:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
cp .env.example .env
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Or set environment variable directly:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
export MIMO_API_KEY=your_api_key_here
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Get your API key from: https://platform.xiaomimimo.com
|
|
44
|
+
|
|
45
|
+
### 3. (Optional) Configure API Base URL
|
|
46
|
+
|
|
47
|
+
By default, the server uses `https://api.xiaomimimo.com/v1`. To use a different API endpoint:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
export MIMO_API_BASE=https://your-custom-endpoint/v1
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Or add it to your `.env` file:
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
MIMO_API_BASE=https://your-custom-endpoint/v1
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
### Development mode (with MCP Inspector)
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
uv run mcp dev src/mimo_multimodal_mcp/server.py
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Install to Claude Desktop
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
uv run mcp install src/mimo_multimodal_mcp/server.py
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Direct execution
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
uv run python src/mimo_multimodal_mcp/server.py
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Tools
|
|
80
|
+
|
|
81
|
+
### `understand_image`
|
|
82
|
+
|
|
83
|
+
Analyze images using Xiaomi MiMo multimodal model.
|
|
84
|
+
|
|
85
|
+
| Parameter | Type | Required | Description |
|
|
86
|
+
|-----------|------|----------|-------------|
|
|
87
|
+
| `prompt` | string | Yes | Image understanding task description |
|
|
88
|
+
| `image_url` | string | No | Single image URL or data:image base64 |
|
|
89
|
+
| `image_path` | string | No | Single local image file path |
|
|
90
|
+
| `image_urls` | list[string] | No | Multiple image URLs |
|
|
91
|
+
| `image_paths` | list[string] | No | Multiple local image file paths |
|
|
92
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
93
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
94
|
+
|
|
95
|
+
**Supported formats**: JPEG, PNG, GIF, WebP
|
|
96
|
+
**Size limit**: 10MB
|
|
97
|
+
|
|
98
|
+
### `understand_audio`
|
|
99
|
+
|
|
100
|
+
Analyze audio using Xiaomi MiMo multimodal model.
|
|
101
|
+
|
|
102
|
+
| Parameter | Type | Required | Description |
|
|
103
|
+
|-----------|------|----------|-------------|
|
|
104
|
+
| `prompt` | string | Yes | Audio understanding task description |
|
|
105
|
+
| `audio_url` | string | No | Single audio URL |
|
|
106
|
+
| `audio_path` | string | No | Single local audio file path |
|
|
107
|
+
| `audio_urls` | list[string] | No | Multiple audio URLs |
|
|
108
|
+
| `audio_paths` | list[string] | No | Multiple local audio file paths |
|
|
109
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
110
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
111
|
+
|
|
112
|
+
**Supported formats**: MP3, WAV, FLAC, M4A, OGG
|
|
113
|
+
**Size limit**: URL 100MB, Base64 50MB
|
|
114
|
+
|
|
115
|
+
### `understand_video`
|
|
116
|
+
|
|
117
|
+
Analyze video using Xiaomi MiMo multimodal model.
|
|
118
|
+
|
|
119
|
+
| Parameter | Type | Required | Description |
|
|
120
|
+
|-----------|------|----------|-------------|
|
|
121
|
+
| `prompt` | string | Yes | Video understanding task description |
|
|
122
|
+
| `video_url` | string | No | Single video URL |
|
|
123
|
+
| `video_path` | string | No | Single local video file path |
|
|
124
|
+
| `video_urls` | list[string] | No | Multiple video URLs |
|
|
125
|
+
| `video_paths` | list[string] | No | Multiple local video file paths |
|
|
126
|
+
| `fps` | float | No | Frames per second, range [0.1, 10], default: 2 |
|
|
127
|
+
| `media_resolution` | string | No | Resolution: "default" or "max" |
|
|
128
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
129
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
130
|
+
|
|
131
|
+
**Supported formats**: MP4, MOV, AVI, WMV
|
|
132
|
+
**Size limit**: URL 300MB, Base64 50MB
|
|
133
|
+
|
|
134
|
+
## Examples
|
|
135
|
+
|
|
136
|
+
### Image Understanding
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
# URL
|
|
140
|
+
await understand_image(prompt="Describe this image", image_url="https://example.com/image.jpg")
|
|
141
|
+
|
|
142
|
+
# Local file
|
|
143
|
+
await understand_image(prompt="What text is in this?", image_path="/path/to/screenshot.png")
|
|
144
|
+
|
|
145
|
+
# Multiple images
|
|
146
|
+
await understand_image(prompt="Compare these", image_urls=["url1", "url2"])
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Audio Understanding
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
# URL
|
|
153
|
+
await understand_audio(prompt="Transcribe this audio", audio_url="https://example.com/audio.wav")
|
|
154
|
+
|
|
155
|
+
# Local file
|
|
156
|
+
await understand_audio(prompt="What is being said?", audio_path="/path/to/audio.mp3")
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Video Understanding
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# URL with default settings
|
|
163
|
+
await understand_video(prompt="Describe this video", video_url="https://example.com/video.mp4")
|
|
164
|
+
|
|
165
|
+
# URL with custom fps and resolution
|
|
166
|
+
await understand_video(
|
|
167
|
+
prompt="Describe the action",
|
|
168
|
+
video_url="https://example.com/video.mp4",
|
|
169
|
+
fps=5.0,
|
|
170
|
+
media_resolution="max"
|
|
171
|
+
)
|
|
172
|
+
```
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# MiMo Multimodal Understanding MCP Server
|
|
2
|
+
|
|
3
|
+
MCP server for Xiaomi MiMo multimodal understanding API (image, audio, video).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Image Understanding**: Single/multiple images, URL and local file support
|
|
8
|
+
- **Audio Understanding**: Single/multiple audio, URL and local file support
|
|
9
|
+
- **Video Understanding**: Single/multiple video, URL and local file support, configurable fps and resolution
|
|
10
|
+
|
|
11
|
+
## Setup
|
|
12
|
+
|
|
13
|
+
### 1. Install dependencies
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv sync
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### 2. Configure API Key
|
|
20
|
+
|
|
21
|
+
Copy `.env.example` to `.env` and fill in your API key:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
cp .env.example .env
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Or set environment variable directly:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
export MIMO_API_KEY=your_api_key_here
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Get your API key from: https://platform.xiaomimimo.com
|
|
34
|
+
|
|
35
|
+
### 3. (Optional) Configure API Base URL
|
|
36
|
+
|
|
37
|
+
By default, the server uses `https://api.xiaomimimo.com/v1`. To use a different API endpoint:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
export MIMO_API_BASE=https://your-custom-endpoint/v1
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Or add it to your `.env` file:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
MIMO_API_BASE=https://your-custom-endpoint/v1
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
### Development mode (with MCP Inspector)
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv run mcp dev src/mimo_multimodal_mcp/server.py
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Install to Claude Desktop
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
uv run mcp install src/mimo_multimodal_mcp/server.py
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Direct execution
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uv run python src/mimo_multimodal_mcp/server.py
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Tools
|
|
70
|
+
|
|
71
|
+
### `understand_image`
|
|
72
|
+
|
|
73
|
+
Analyze images using Xiaomi MiMo multimodal model.
|
|
74
|
+
|
|
75
|
+
| Parameter | Type | Required | Description |
|
|
76
|
+
|-----------|------|----------|-------------|
|
|
77
|
+
| `prompt` | string | Yes | Image understanding task description |
|
|
78
|
+
| `image_url` | string | No | Single image URL or data:image base64 |
|
|
79
|
+
| `image_path` | string | No | Single local image file path |
|
|
80
|
+
| `image_urls` | list[string] | No | Multiple image URLs |
|
|
81
|
+
| `image_paths` | list[string] | No | Multiple local image file paths |
|
|
82
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
83
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
84
|
+
|
|
85
|
+
**Supported formats**: JPEG, PNG, GIF, WebP
|
|
86
|
+
**Size limit**: 10MB
|
|
87
|
+
|
|
88
|
+
### `understand_audio`
|
|
89
|
+
|
|
90
|
+
Analyze audio using Xiaomi MiMo multimodal model.
|
|
91
|
+
|
|
92
|
+
| Parameter | Type | Required | Description |
|
|
93
|
+
|-----------|------|----------|-------------|
|
|
94
|
+
| `prompt` | string | Yes | Audio understanding task description |
|
|
95
|
+
| `audio_url` | string | No | Single audio URL |
|
|
96
|
+
| `audio_path` | string | No | Single local audio file path |
|
|
97
|
+
| `audio_urls` | list[string] | No | Multiple audio URLs |
|
|
98
|
+
| `audio_paths` | list[string] | No | Multiple local audio file paths |
|
|
99
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
100
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
101
|
+
|
|
102
|
+
**Supported formats**: MP3, WAV, FLAC, M4A, OGG
|
|
103
|
+
**Size limit**: URL 100MB, Base64 50MB
|
|
104
|
+
|
|
105
|
+
### `understand_video`
|
|
106
|
+
|
|
107
|
+
Analyze video using Xiaomi MiMo multimodal model.
|
|
108
|
+
|
|
109
|
+
| Parameter | Type | Required | Description |
|
|
110
|
+
|-----------|------|----------|-------------|
|
|
111
|
+
| `prompt` | string | Yes | Video understanding task description |
|
|
112
|
+
| `video_url` | string | No | Single video URL |
|
|
113
|
+
| `video_path` | string | No | Single local video file path |
|
|
114
|
+
| `video_urls` | list[string] | No | Multiple video URLs |
|
|
115
|
+
| `video_paths` | list[string] | No | Multiple local video file paths |
|
|
116
|
+
| `fps` | float | No | Frames per second, range [0.1, 10], default: 2 |
|
|
117
|
+
| `media_resolution` | string | No | Resolution: "default" or "max" |
|
|
118
|
+
| `system_prompt` | string | No | Custom system prompt |
|
|
119
|
+
| `max_tokens` | integer | No | Max output length (default: 32768) |
|
|
120
|
+
|
|
121
|
+
**Supported formats**: MP4, MOV, AVI, WMV
|
|
122
|
+
**Size limit**: URL 300MB, Base64 50MB
|
|
123
|
+
|
|
124
|
+
## Examples
|
|
125
|
+
|
|
126
|
+
### Image Understanding
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
# URL
|
|
130
|
+
await understand_image(prompt="Describe this image", image_url="https://example.com/image.jpg")
|
|
131
|
+
|
|
132
|
+
# Local file
|
|
133
|
+
await understand_image(prompt="What text is in this?", image_path="/path/to/screenshot.png")
|
|
134
|
+
|
|
135
|
+
# Multiple images
|
|
136
|
+
await understand_image(prompt="Compare these", image_urls=["url1", "url2"])
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Audio Understanding
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
# URL
|
|
143
|
+
await understand_audio(prompt="Transcribe this audio", audio_url="https://example.com/audio.wav")
|
|
144
|
+
|
|
145
|
+
# Local file
|
|
146
|
+
await understand_audio(prompt="What is being said?", audio_path="/path/to/audio.mp3")
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Video Understanding
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
# URL with default settings
|
|
153
|
+
await understand_video(prompt="Describe this video", video_url="https://example.com/video.mp4")
|
|
154
|
+
|
|
155
|
+
# URL with custom fps and resolution
|
|
156
|
+
await understand_video(
|
|
157
|
+
prompt="Describe the action",
|
|
158
|
+
video_url="https://example.com/video.mp4",
|
|
159
|
+
fps=5.0,
|
|
160
|
+
media_resolution="max"
|
|
161
|
+
)
|
|
162
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mimo-multimodal-mcp"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "MCP server for Xiaomi MiMo multimodal understanding (image, audio, video)"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"mcp[cli]>=1.2.0",
|
|
9
|
+
"openai>=1.0.0",
|
|
10
|
+
"python-dotenv>=1.0.0",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[project.scripts]
|
|
14
|
+
mimo-multimodal-mcp = "mimo_multimodal_mcp.server:main"
|
|
15
|
+
|
|
16
|
+
[build-system]
|
|
17
|
+
requires = ["hatchling"]
|
|
18
|
+
build-backend = "hatchling.build"
|
|
19
|
+
|
|
20
|
+
[tool.hatch.build.targets.wheel]
|
|
21
|
+
packages = ["src/mimo_multimodal_mcp"]
|