async-media-agents 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- async_media_agents-0.1.0/LICENSE +21 -0
- async_media_agents-0.1.0/PKG-INFO +292 -0
- async_media_agents-0.1.0/README.md +184 -0
- async_media_agents-0.1.0/async_media_agents/__init__.py +7 -0
- async_media_agents-0.1.0/async_media_agents/agents/__init__.py +13 -0
- async_media_agents-0.1.0/async_media_agents/agents/audio_agent.py +78 -0
- async_media_agents-0.1.0/async_media_agents/agents/image_agent.py +79 -0
- async_media_agents-0.1.0/async_media_agents/agents/supervisor.py +150 -0
- async_media_agents-0.1.0/async_media_agents/agents/video_agent.py +79 -0
- async_media_agents-0.1.0/async_media_agents/graph/__init__.py +7 -0
- async_media_agents-0.1.0/async_media_agents/graph/workflow.py +145 -0
- async_media_agents-0.1.0/async_media_agents/state/shared_memory.py +52 -0
- async_media_agents-0.1.0/async_media_agents/state/state.py +38 -0
- async_media_agents-0.1.0/async_media_agents/tools/audio_tool.py +29 -0
- async_media_agents-0.1.0/async_media_agents/tools/image_tool.py +29 -0
- async_media_agents-0.1.0/async_media_agents/tools/mcp_client.py +60 -0
- async_media_agents-0.1.0/async_media_agents/tools/mcp_server.py +76 -0
- async_media_agents-0.1.0/async_media_agents/tools/processors/__init__.py +7 -0
- async_media_agents-0.1.0/async_media_agents/tools/processors/audio_processor.py +60 -0
- async_media_agents-0.1.0/async_media_agents/tools/processors/image_processor.py +55 -0
- async_media_agents-0.1.0/async_media_agents/tools/processors/video_processor.py +73 -0
- async_media_agents-0.1.0/async_media_agents/tools/video_tool.py +29 -0
- async_media_agents-0.1.0/async_media_agents/utils/requirements.py +53 -0
- async_media_agents-0.1.0/async_media_agents.egg-info/PKG-INFO +292 -0
- async_media_agents-0.1.0/async_media_agents.egg-info/SOURCES.txt +28 -0
- async_media_agents-0.1.0/async_media_agents.egg-info/dependency_links.txt +1 -0
- async_media_agents-0.1.0/async_media_agents.egg-info/requires.txt +98 -0
- async_media_agents-0.1.0/async_media_agents.egg-info/top_level.txt +1 -0
- async_media_agents-0.1.0/pyproject.toml +142 -0
- async_media_agents-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Abka Ferguson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: async-media-agents
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A multi-agent workflow for processing audio, image, and video data.
|
|
5
|
+
Author-email: Abka012 <fergusonabka012@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: aiofile==3.11.1
|
|
11
|
+
Requires-Dist: annotated-types==0.7.0
|
|
12
|
+
Requires-Dist: anyio==4.13.0
|
|
13
|
+
Requires-Dist: attrs==26.1.0
|
|
14
|
+
Requires-Dist: Authlib==1.7.2
|
|
15
|
+
Requires-Dist: beartype==0.22.9
|
|
16
|
+
Requires-Dist: cachetools==7.1.3
|
|
17
|
+
Requires-Dist: caio==0.9.25
|
|
18
|
+
Requires-Dist: certifi==2026.5.20
|
|
19
|
+
Requires-Dist: cffi==2.0.0
|
|
20
|
+
Requires-Dist: charset-normalizer==3.4.7
|
|
21
|
+
Requires-Dist: click==8.4.0
|
|
22
|
+
Requires-Dist: cryptography==48.0.0
|
|
23
|
+
Requires-Dist: cyclopts==4.15.0
|
|
24
|
+
Requires-Dist: dnspython==2.8.0
|
|
25
|
+
Requires-Dist: docstring_parser==0.18.0
|
|
26
|
+
Requires-Dist: email-validator==2.3.0
|
|
27
|
+
Requires-Dist: exceptiongroup==1.3.1
|
|
28
|
+
Requires-Dist: fastmcp==3.3.1
|
|
29
|
+
Requires-Dist: fastmcp-slim==3.3.1
|
|
30
|
+
Requires-Dist: griffelib==2.0.2
|
|
31
|
+
Requires-Dist: h11==0.16.0
|
|
32
|
+
Requires-Dist: httpcore==1.0.9
|
|
33
|
+
Requires-Dist: httpx==0.28.1
|
|
34
|
+
Requires-Dist: httpx-sse==0.4.3
|
|
35
|
+
Requires-Dist: idna==3.15
|
|
36
|
+
Requires-Dist: jaraco.classes==3.4.0
|
|
37
|
+
Requires-Dist: jaraco.context==6.1.2
|
|
38
|
+
Requires-Dist: jaraco.functools==4.5.0
|
|
39
|
+
Requires-Dist: jeepney==0.9.0
|
|
40
|
+
Requires-Dist: joserfc==1.6.5
|
|
41
|
+
Requires-Dist: jsonpatch==1.33
|
|
42
|
+
Requires-Dist: jsonpointer==3.1.1
|
|
43
|
+
Requires-Dist: jsonref==1.1.0
|
|
44
|
+
Requires-Dist: jsonschema==4.26.0
|
|
45
|
+
Requires-Dist: jsonschema-path==0.5.0
|
|
46
|
+
Requires-Dist: jsonschema-specifications==2025.9.1
|
|
47
|
+
Requires-Dist: keyring==25.7.0
|
|
48
|
+
Requires-Dist: langchain==1.3.1
|
|
49
|
+
Requires-Dist: langchain-core==1.4.0
|
|
50
|
+
Requires-Dist: langchain-protocol==0.0.15
|
|
51
|
+
Requires-Dist: langgraph==1.2.1
|
|
52
|
+
Requires-Dist: langgraph-checkpoint==4.1.0
|
|
53
|
+
Requires-Dist: langgraph-prebuilt==1.1.0
|
|
54
|
+
Requires-Dist: langgraph-sdk==0.3.14
|
|
55
|
+
Requires-Dist: langsmith==0.8.5
|
|
56
|
+
Requires-Dist: markdown-it-py==4.2.0
|
|
57
|
+
Requires-Dist: mcp==1.27.1
|
|
58
|
+
Requires-Dist: mdurl==0.1.2
|
|
59
|
+
Requires-Dist: more-itertools==11.0.2
|
|
60
|
+
Requires-Dist: nodejs-wheel-binaries==24.15.0
|
|
61
|
+
Requires-Dist: numpy==2.4.6
|
|
62
|
+
Requires-Dist: openapi-pydantic==0.5.1
|
|
63
|
+
Requires-Dist: opencv-python-headless==4.13.0.92
|
|
64
|
+
Requires-Dist: opentelemetry-api==1.42.1
|
|
65
|
+
Requires-Dist: orjson==3.11.9
|
|
66
|
+
Requires-Dist: ormsgpack==1.12.2
|
|
67
|
+
Requires-Dist: packaging==26.2
|
|
68
|
+
Requires-Dist: pathable==0.6.0
|
|
69
|
+
Requires-Dist: pillow==12.2.0
|
|
70
|
+
Requires-Dist: platformdirs==4.9.6
|
|
71
|
+
Requires-Dist: py-key-value-aio==0.4.4
|
|
72
|
+
Requires-Dist: pycparser==3.0
|
|
73
|
+
Requires-Dist: pydantic==2.13.4
|
|
74
|
+
Requires-Dist: pydantic-settings==2.14.1
|
|
75
|
+
Requires-Dist: pydantic_core==2.46.4
|
|
76
|
+
Requires-Dist: Pygments==2.20.0
|
|
77
|
+
Requires-Dist: PyJWT==2.13.0
|
|
78
|
+
Requires-Dist: pyperclip==1.11.0
|
|
79
|
+
Requires-Dist: python-dotenv==1.2.2
|
|
80
|
+
Requires-Dist: python-multipart==0.0.29
|
|
81
|
+
Requires-Dist: PyYAML==6.0.3
|
|
82
|
+
Requires-Dist: referencing==0.37.0
|
|
83
|
+
Requires-Dist: requests==2.34.2
|
|
84
|
+
Requires-Dist: requests-toolbelt==1.0.0
|
|
85
|
+
Requires-Dist: rich==15.0.0
|
|
86
|
+
Requires-Dist: rich-rst==2.0.1
|
|
87
|
+
Requires-Dist: rpds-py==0.30.0
|
|
88
|
+
Requires-Dist: SecretStorage==3.5.0
|
|
89
|
+
Requires-Dist: sse-starlette==3.4.4
|
|
90
|
+
Requires-Dist: starlette==1.0.1
|
|
91
|
+
Requires-Dist: tenacity==9.1.4
|
|
92
|
+
Requires-Dist: typing-inspection==0.4.2
|
|
93
|
+
Requires-Dist: typing_extensions==4.15.0
|
|
94
|
+
Requires-Dist: uncalled-for==0.3.2
|
|
95
|
+
Requires-Dist: urllib3==2.7.0
|
|
96
|
+
Requires-Dist: uuid_utils==0.16.0
|
|
97
|
+
Requires-Dist: uvicorn==0.47.0
|
|
98
|
+
Requires-Dist: watchfiles==1.2.0
|
|
99
|
+
Requires-Dist: websockets==16.0
|
|
100
|
+
Requires-Dist: xxhash==3.7.0
|
|
101
|
+
Requires-Dist: zstandard==0.25.0
|
|
102
|
+
Provides-Extra: dev
|
|
103
|
+
Requires-Dist: pytest; extra == "dev"
|
|
104
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
105
|
+
Requires-Dist: flake8; extra == "dev"
|
|
106
|
+
Requires-Dist: basedpyright==1.39.5; extra == "dev"
|
|
107
|
+
Dynamic: license-file
|
|
108
|
+
|
|
109
|
+
# async-media-agents
|
|
110
|
+
|
|
111
|
+
Async Multi-Agent POC using LangGraph + FastMCP + real media processing.
|
|
112
|
+
|
|
113
|
+
## Overview
|
|
114
|
+
|
|
115
|
+
A lightweight async multi-agent system that processes images, audio, and video via base64-encoded payloads. Uses LangGraph for workflow orchestration, FastMCP for tool registration, and real processing libraries (Pillow, OpenCV, stdlib `wave`).
|
|
116
|
+
|
|
117
|
+
## Architecture
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
User Request
|
|
121
|
+
|
|
|
122
|
+
SupervisorAgent
|
|
123
|
+
|
|
|
124
|
+
Fan-out (parallel conditional edge)
|
|
125
|
+
┌──────────┼──────────┐
|
|
126
|
+
| | |
|
|
127
|
+
ImageAgent AudioAgent VideoAgent
|
|
128
|
+
| | |
|
|
129
|
+
Pillow stdlib wave OpenCV
|
|
130
|
+
processor processor processor
|
|
131
|
+
| | |
|
|
132
|
+
└──────────┼──────────┘
|
|
133
|
+
Fan-in to Aggregator
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Features
|
|
137
|
+
|
|
138
|
+
- ✅ **Real media processing** — Powered by Pillow (Images), OpenCV (Video), and stdlib `wave` (Audio).
|
|
139
|
+
- ✅ **Parallel execution** — High-performance fan-out/fan-in orchestration using LangGraph.
|
|
140
|
+
- ✅ **MCP Integration** — Model Context Protocol support via FastMCP for tool registration.
|
|
141
|
+
- ✅ **Robust error handling** — Graceful recovery from corrupt base64 or invalid formats.
|
|
142
|
+
- ✅ **Shared state** — Concurrent-safe state management via `Annotated` reducers.
|
|
143
|
+
- ✅ **Type Safety** — Comprehensive static analysis with `basedpyright`.
|
|
144
|
+
|
|
145
|
+
## Requirements
|
|
146
|
+
|
|
147
|
+
```text
|
|
148
|
+
langgraph
|
|
149
|
+
langchain
|
|
150
|
+
fastmcp
|
|
151
|
+
Pillow
|
|
152
|
+
opencv-python-headless
|
|
153
|
+
numpy
|
|
154
|
+
pytest
|
|
155
|
+
pytest-asyncio
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Setup
|
|
159
|
+
|
|
160
|
+
### 1. Create and activate a virtual environment
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
# Create the virtual environment
|
|
164
|
+
python -m venv .venv
|
|
165
|
+
|
|
166
|
+
# Activate it
|
|
167
|
+
source .venv/bin/activate # Linux / macOS
|
|
168
|
+
.venv\Scripts\activate # Windows (cmd)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### 2. Install dependencies
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
pip install -r requirements.txt
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Project Structure
|
|
178
|
+
|
|
179
|
+
```text
|
|
180
|
+
project/
|
|
181
|
+
├── async_media_agents/
|
|
182
|
+
│ ├── agents/
|
|
183
|
+
│ │ ├── image_agent.py # Specialized image logic
|
|
184
|
+
│ │ ├── audio_agent.py # Specialized audio logic
|
|
185
|
+
│ │ ├── video_agent.py # Specialized video logic
|
|
186
|
+
│ │ └── supervisor.py # Orchestration & routing
|
|
187
|
+
│ │
|
|
188
|
+
│ ├── tools/
|
|
189
|
+
│ │ ├── mcp_server.py # FastMCP server registration
|
|
190
|
+
│ │ ├── mcp_client.py # Async client interface
|
|
191
|
+
│ │ └── processors/ # Pure processing logic (decoupled)
|
|
192
|
+
│ │ ├── image_processor.py # Pillow-based
|
|
193
|
+
│ │ ├── audio_processor.py # stdlib wave
|
|
194
|
+
│ │ └── video_processor.py # OpenCV-based
|
|
195
|
+
│ │
|
|
196
|
+
│ ├── graph/
|
|
197
|
+
│ │ └── workflow.py # LangGraph workflow definition
|
|
198
|
+
│ │
|
|
199
|
+
│ ├── state/
|
|
200
|
+
│ │ ├── state.py # Shared AgentState with reducers
|
|
201
|
+
│ │ └── shared_memory.py # Global shared context utilities
|
|
202
|
+
│ │
|
|
203
|
+
│ └── __init__.py
|
|
204
|
+
│
|
|
205
|
+
├── scripts/
|
|
206
|
+
│ └── test_workflow.py # Integration & payload tests
|
|
207
|
+
│
|
|
208
|
+
├── main.py # CLI demo & examples
|
|
209
|
+
├── requirements.txt
|
|
210
|
+
├── pyproject.toml # Project & Tool configuration
|
|
211
|
+
└── README.md
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Usage
|
|
215
|
+
|
|
216
|
+
### Run the demo
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
python main.py
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
The demo demonstrates three core patterns:
|
|
223
|
+
1. **Text-based routing** — Auto-detects task type from user natural language.
|
|
224
|
+
2. **Parallel processing** — Simultaneously processes multiple media types via fan-out.
|
|
225
|
+
3. **Supervisor workflow** — Uses the Supervisor agent to route to a single target.
|
|
226
|
+
|
|
227
|
+
### Run tests
|
|
228
|
+
|
|
229
|
+
Tests can be executed directly or through `pytest`:
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# Direct execution (includes payload generation logs)
|
|
233
|
+
python scripts/test_workflow.py
|
|
234
|
+
|
|
235
|
+
# Using pytest
|
|
236
|
+
pytest scripts/test_workflow.py
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
The test suite covers:
|
|
240
|
+
- Base64 payload integrity (PNG, WAV, MP4)
|
|
241
|
+
- Real metadata extraction via processors
|
|
242
|
+
- Parallel transport through the workflow
|
|
243
|
+
- Partial payload handling
|
|
244
|
+
- Robust error handling for corrupted data
|
|
245
|
+
|
|
246
|
+
### Programmatic use
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
from async_media_agents.graph.workflow import Workflow
|
|
250
|
+
|
|
251
|
+
wf = Workflow()
|
|
252
|
+
|
|
253
|
+
# Text-based — auto-routes to image/audio/video agent
|
|
254
|
+
result = await wf.run("Analyze this image")
|
|
255
|
+
|
|
256
|
+
# Parallel — feeds base64 to all three agents
|
|
257
|
+
result = await wf.run_parallel(
|
|
258
|
+
image_data="base64_encoded_png",
|
|
259
|
+
audio_data="base64_encoded_wav",
|
|
260
|
+
video_data="base64_encoded_mp4",
|
|
261
|
+
)
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
## Processing Pipeline
|
|
265
|
+
|
|
266
|
+
| Media | Library | Extracted Metadata |
|
|
267
|
+
|---------|-----------------------|-------------------------------------------------------|
|
|
268
|
+
| Image | Pillow | width, height, format, mode, thumbnail |
|
|
269
|
+
| Audio | stdlib `wave` | duration, sample rate, channels, sample width, frames |
|
|
270
|
+
| Video | opencv-python-headless | duration, fps, resolution, frame count, dimensions |
|
|
271
|
+
|
|
272
|
+
All processors return structured dicts: `{"status": "success", "metadata": {...}}` or `{"status": "error", "error": "..."}`.
|
|
273
|
+
|
|
274
|
+
## Static Analysis
|
|
275
|
+
|
|
276
|
+
The project uses `basedpyright` for type checking:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
pip install basedpyright
|
|
280
|
+
basedpyright .
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Configuration lives in `pyproject.toml` under `[tool.pyright]`.
|
|
284
|
+
|
|
285
|
+
## State Management
|
|
286
|
+
|
|
287
|
+
`AgentState` extends `MessagesState` with custom fields. Fields written concurrently during parallel fan-out use `Annotated` reducers for safe merging:
|
|
288
|
+
|
|
289
|
+
- `results` — `_merge_results` dict merge
|
|
290
|
+
- `metadata` — `_merge_results` dict merge
|
|
291
|
+
- `current_agent` — `_last_wins`
|
|
292
|
+
- `error` — `_last_wins`
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# async-media-agents
|
|
2
|
+
|
|
3
|
+
Async Multi-Agent POC using LangGraph + FastMCP + real media processing.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
A lightweight async multi-agent system that processes images, audio, and video via base64-encoded payloads. Uses LangGraph for workflow orchestration, FastMCP for tool registration, and real processing libraries (Pillow, OpenCV, stdlib `wave`).
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
User Request
|
|
13
|
+
|
|
|
14
|
+
SupervisorAgent
|
|
15
|
+
|
|
|
16
|
+
Fan-out (parallel conditional edge)
|
|
17
|
+
┌──────────┼──────────┐
|
|
18
|
+
| | |
|
|
19
|
+
ImageAgent AudioAgent VideoAgent
|
|
20
|
+
| | |
|
|
21
|
+
Pillow stdlib wave OpenCV
|
|
22
|
+
processor processor processor
|
|
23
|
+
| | |
|
|
24
|
+
└──────────┼──────────┘
|
|
25
|
+
Fan-in to Aggregator
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Features
|
|
29
|
+
|
|
30
|
+
- ✅ **Real media processing** — Powered by Pillow (Images), OpenCV (Video), and stdlib `wave` (Audio).
|
|
31
|
+
- ✅ **Parallel execution** — High-performance fan-out/fan-in orchestration using LangGraph.
|
|
32
|
+
- ✅ **MCP Integration** — Model Context Protocol support via FastMCP for tool registration.
|
|
33
|
+
- ✅ **Robust error handling** — Graceful recovery from corrupt base64 or invalid formats.
|
|
34
|
+
- ✅ **Shared state** — Concurrent-safe state management via `Annotated` reducers.
|
|
35
|
+
- ✅ **Type Safety** — Comprehensive static analysis with `basedpyright`.
|
|
36
|
+
|
|
37
|
+
## Requirements
|
|
38
|
+
|
|
39
|
+
```text
|
|
40
|
+
langgraph
|
|
41
|
+
langchain
|
|
42
|
+
fastmcp
|
|
43
|
+
Pillow
|
|
44
|
+
opencv-python-headless
|
|
45
|
+
numpy
|
|
46
|
+
pytest
|
|
47
|
+
pytest-asyncio
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Setup
|
|
51
|
+
|
|
52
|
+
### 1. Create and activate a virtual environment
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Create the virtual environment
|
|
56
|
+
python -m venv .venv
|
|
57
|
+
|
|
58
|
+
# Activate it
|
|
59
|
+
source .venv/bin/activate # Linux / macOS
|
|
60
|
+
.venv\Scripts\activate # Windows (cmd)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### 2. Install dependencies
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -r requirements.txt
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Project Structure
|
|
70
|
+
|
|
71
|
+
```text
|
|
72
|
+
project/
|
|
73
|
+
├── async_media_agents/
|
|
74
|
+
│ ├── agents/
|
|
75
|
+
│ │ ├── image_agent.py # Specialized image logic
|
|
76
|
+
│ │ ├── audio_agent.py # Specialized audio logic
|
|
77
|
+
│ │ ├── video_agent.py # Specialized video logic
|
|
78
|
+
│ │ └── supervisor.py # Orchestration & routing
|
|
79
|
+
│ │
|
|
80
|
+
│ ├── tools/
|
|
81
|
+
│ │ ├── mcp_server.py # FastMCP server registration
|
|
82
|
+
│ │ ├── mcp_client.py # Async client interface
|
|
83
|
+
│ │ └── processors/ # Pure processing logic (decoupled)
|
|
84
|
+
│ │ ├── image_processor.py # Pillow-based
|
|
85
|
+
│ │ ├── audio_processor.py # stdlib wave
|
|
86
|
+
│ │ └── video_processor.py # OpenCV-based
|
|
87
|
+
│ │
|
|
88
|
+
│ ├── graph/
|
|
89
|
+
│ │ └── workflow.py # LangGraph workflow definition
|
|
90
|
+
│ │
|
|
91
|
+
│ ├── state/
|
|
92
|
+
│ │ ├── state.py # Shared AgentState with reducers
|
|
93
|
+
│ │ └── shared_memory.py # Global shared context utilities
|
|
94
|
+
│ │
|
|
95
|
+
│ └── __init__.py
|
|
96
|
+
│
|
|
97
|
+
├── scripts/
|
|
98
|
+
│ └── test_workflow.py # Integration & payload tests
|
|
99
|
+
│
|
|
100
|
+
├── main.py # CLI demo & examples
|
|
101
|
+
├── requirements.txt
|
|
102
|
+
├── pyproject.toml # Project & Tool configuration
|
|
103
|
+
└── README.md
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Usage
|
|
107
|
+
|
|
108
|
+
### Run the demo
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
python main.py
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The demo demonstrates three core patterns:
|
|
115
|
+
1. **Text-based routing** — Auto-detects task type from user natural language.
|
|
116
|
+
2. **Parallel processing** — Simultaneously processes multiple media types via fan-out.
|
|
117
|
+
3. **Supervisor workflow** — Uses the Supervisor agent to route to a single target.
|
|
118
|
+
|
|
119
|
+
### Run tests
|
|
120
|
+
|
|
121
|
+
Tests can be executed directly or through `pytest`:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Direct execution (includes payload generation logs)
|
|
125
|
+
python scripts/test_workflow.py
|
|
126
|
+
|
|
127
|
+
# Using pytest
|
|
128
|
+
pytest scripts/test_workflow.py
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
The test suite covers:
|
|
132
|
+
- Base64 payload integrity (PNG, WAV, MP4)
|
|
133
|
+
- Real metadata extraction via processors
|
|
134
|
+
- Parallel transport through the workflow
|
|
135
|
+
- Partial payload handling
|
|
136
|
+
- Robust error handling for corrupted data
|
|
137
|
+
|
|
138
|
+
### Programmatic use
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from async_media_agents.graph.workflow import Workflow
|
|
142
|
+
|
|
143
|
+
wf = Workflow()
|
|
144
|
+
|
|
145
|
+
# Text-based — auto-routes to image/audio/video agent
|
|
146
|
+
result = await wf.run("Analyze this image")
|
|
147
|
+
|
|
148
|
+
# Parallel — feeds base64 to all three agents
|
|
149
|
+
result = await wf.run_parallel(
|
|
150
|
+
image_data="base64_encoded_png",
|
|
151
|
+
audio_data="base64_encoded_wav",
|
|
152
|
+
video_data="base64_encoded_mp4",
|
|
153
|
+
)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Processing Pipeline
|
|
157
|
+
|
|
158
|
+
| Media | Library | Extracted Metadata |
|
|
159
|
+
|---------|-----------------------|-------------------------------------------------------|
|
|
160
|
+
| Image | Pillow | width, height, format, mode, thumbnail |
|
|
161
|
+
| Audio | stdlib `wave` | duration, sample rate, channels, sample width, frames |
|
|
162
|
+
| Video | opencv-python-headless | duration, fps, resolution, frame count, dimensions |
|
|
163
|
+
|
|
164
|
+
All processors return structured dicts: `{"status": "success", "metadata": {...}}` or `{"status": "error", "error": "..."}`.
|
|
165
|
+
|
|
166
|
+
## Static Analysis
|
|
167
|
+
|
|
168
|
+
The project uses `basedpyright` for type checking:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
pip install basedpyright
|
|
172
|
+
basedpyright .
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Configuration lives in `pyproject.toml` under `[tool.pyright]`.
|
|
176
|
+
|
|
177
|
+
## State Management
|
|
178
|
+
|
|
179
|
+
`AgentState` extends `MessagesState` with custom fields. Fields written concurrently during parallel fan-out use `Annotated` reducers for safe merging:
|
|
180
|
+
|
|
181
|
+
- `results` — `_merge_results` dict merge
|
|
182
|
+
- `metadata` — `_merge_results` dict merge
|
|
183
|
+
- `current_agent` — `_last_wins`
|
|
184
|
+
- `error` — `_last_wins`
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Async Multi-Agent System Package
|
|
2
|
+
"""Multi-agent orchestration system using LangGraph and FastMCP."""
|
|
3
|
+
|
|
4
|
+
from async_media_agents.state.state import AgentState
|
|
5
|
+
from async_media_agents.utils.requirements import update_requirements
|
|
6
|
+
|
|
7
|
+
__all__ = ["AgentState", "update_requirements"]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Multi-agent system package."""
|
|
2
|
+
|
|
3
|
+
from async_media_agents.agents.audio_agent import AudioAgent
|
|
4
|
+
from async_media_agents.agents.image_agent import ImageAgent
|
|
5
|
+
from async_media_agents.agents.supervisor import SupervisorAgent
|
|
6
|
+
from async_media_agents.agents.video_agent import VideoAgent
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"ImageAgent",
|
|
10
|
+
"AudioAgent",
|
|
11
|
+
"VideoAgent",
|
|
12
|
+
"SupervisorAgent",
|
|
13
|
+
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Audio processing agent."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
6
|
+
|
|
7
|
+
from async_media_agents.state.state import AgentState
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AudioAgent:
|
|
11
|
+
"""Agent specialized in audio processing."""
|
|
12
|
+
|
|
13
|
+
system_prompt: str = """You are an audio processing agent.
|
|
14
|
+
|
|
15
|
+
Your role is to analyze and process audio based on user requests.
|
|
16
|
+
You can:
|
|
17
|
+
- Extract audio metadata (duration, sample rate, etc.)
|
|
18
|
+
- Detect audio content (speech, music, noise)
|
|
19
|
+
- Identify audio characteristics (volume, pitch)
|
|
20
|
+
- Process audio streams
|
|
21
|
+
|
|
22
|
+
Always use the available tools for processing audio.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.name: str = "audio_agent"
|
|
27
|
+
self.prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
|
28
|
+
[
|
|
29
|
+
("system", self.system_prompt),
|
|
30
|
+
("user", "{input}"),
|
|
31
|
+
]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
async def process_audio(self, audio_data: str, format: str = "wav") -> str:
|
|
35
|
+
"""Process audio data and return results."""
|
|
36
|
+
from async_media_agents.tools.mcp_client import mcp_client
|
|
37
|
+
|
|
38
|
+
result = await mcp_client.process_audio(audio_data, format)
|
|
39
|
+
if result.get("status") == "error":
|
|
40
|
+
return f"Error: {result.get('error', 'unknown error')}"
|
|
41
|
+
return result.get("result", "No result returned")
|
|
42
|
+
|
|
43
|
+
async def extract_info(self, audio_data: str) -> dict:
|
|
44
|
+
"""Extract metadata from audio."""
|
|
45
|
+
from async_media_agents.tools.mcp_client import mcp_client
|
|
46
|
+
|
|
47
|
+
return await mcp_client.process_audio(audio_data, "wav")
|
|
48
|
+
|
|
49
|
+
async def run(self, state: AgentState) -> dict[str, Any]:
|
|
50
|
+
"""Run the audio agent."""
|
|
51
|
+
task_data = state.get("task_data", {})
|
|
52
|
+
|
|
53
|
+
audio_data = task_data.get("audio_data", "")
|
|
54
|
+
|
|
55
|
+
result: dict[str, Any] = {
|
|
56
|
+
"current_agent": self.name,
|
|
57
|
+
"results": {
|
|
58
|
+
"audio_agent": {
|
|
59
|
+
"status": "completed",
|
|
60
|
+
"task_type": "audio",
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if audio_data:
|
|
66
|
+
processed = await self.process_audio(audio_data, "wav")
|
|
67
|
+
result["results"]["audio_agent"]["processing_result"] = processed
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
async def process(self, audio_data: str) -> dict[str, Any]:
|
|
72
|
+
"""Process audio directly."""
|
|
73
|
+
result = await self.process_audio(audio_data, "wav")
|
|
74
|
+
return {
|
|
75
|
+
"status": "success" if not result.startswith("Error") else "error",
|
|
76
|
+
"agent": self.name,
|
|
77
|
+
"result": result,
|
|
78
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Image processing agent."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from langchain_core.prompts import ChatPromptTemplate
|
|
6
|
+
|
|
7
|
+
from async_media_agents.state.state import AgentState
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ImageAgent:
|
|
11
|
+
"""Agent specialized in image processing."""
|
|
12
|
+
|
|
13
|
+
system_prompt: str = """You are an image processing agent.
|
|
14
|
+
|
|
15
|
+
Your role is to analyze and process images based on user requests.
|
|
16
|
+
You can:
|
|
17
|
+
- Extract image metadata
|
|
18
|
+
- Identify objects, scenes, or people in images
|
|
19
|
+
- Detect image properties (size, format, colors)
|
|
20
|
+
- Apply filters or transformations
|
|
21
|
+
|
|
22
|
+
Always use the available tools for processing images.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.name: str = "image_agent"
|
|
27
|
+
self.prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
|
28
|
+
[
|
|
29
|
+
("system", self.system_prompt),
|
|
30
|
+
("user", "{input}"),
|
|
31
|
+
]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
async def process_image(self, image_data: str, format: str = "png") -> str:
|
|
35
|
+
"""Process image data and return results."""
|
|
36
|
+
from async_media_agents.tools.mcp_client import mcp_client
|
|
37
|
+
|
|
38
|
+
result = await mcp_client.process_image(image_data, format)
|
|
39
|
+
if result.get("status") == "error":
|
|
40
|
+
return f"Error: {result.get('error', 'unknown error')}"
|
|
41
|
+
return result.get("result", "No result returned")
|
|
42
|
+
|
|
43
|
+
async def extract_info(self, image_data: str) -> dict:
|
|
44
|
+
"""Extract metadata from image."""
|
|
45
|
+
from async_media_agents.tools.mcp_client import mcp_client
|
|
46
|
+
|
|
47
|
+
return await mcp_client.process_image(image_data, "png")
|
|
48
|
+
|
|
49
|
+
async def run(self, state: AgentState) -> dict[str, Any]:
|
|
50
|
+
"""Run the image agent."""
|
|
51
|
+
task_data = state.get("task_data", {})
|
|
52
|
+
|
|
53
|
+
# Get image data from task
|
|
54
|
+
image_data = task_data.get("image_data", "")
|
|
55
|
+
|
|
56
|
+
result: dict[str, Any] = {
|
|
57
|
+
"current_agent": self.name,
|
|
58
|
+
"results": {
|
|
59
|
+
"image_agent": {
|
|
60
|
+
"status": "completed",
|
|
61
|
+
"task_type": "image",
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if image_data:
|
|
67
|
+
processed = await self.process_image(image_data, "png")
|
|
68
|
+
result["results"]["image_agent"]["processing_result"] = processed
|
|
69
|
+
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
async def process(self, image_data: str) -> dict[str, Any]:
|
|
73
|
+
"""Process image directly."""
|
|
74
|
+
result = await self.process_image(image_data, "png")
|
|
75
|
+
return {
|
|
76
|
+
"status": "success" if not result.startswith("Error") else "error",
|
|
77
|
+
"agent": self.name,
|
|
78
|
+
"result": result,
|
|
79
|
+
}
|