async-media-agents 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. async_media_agents-0.1.0/LICENSE +21 -0
  2. async_media_agents-0.1.0/PKG-INFO +292 -0
  3. async_media_agents-0.1.0/README.md +184 -0
  4. async_media_agents-0.1.0/async_media_agents/__init__.py +7 -0
  5. async_media_agents-0.1.0/async_media_agents/agents/__init__.py +13 -0
  6. async_media_agents-0.1.0/async_media_agents/agents/audio_agent.py +78 -0
  7. async_media_agents-0.1.0/async_media_agents/agents/image_agent.py +79 -0
  8. async_media_agents-0.1.0/async_media_agents/agents/supervisor.py +150 -0
  9. async_media_agents-0.1.0/async_media_agents/agents/video_agent.py +79 -0
  10. async_media_agents-0.1.0/async_media_agents/graph/__init__.py +7 -0
  11. async_media_agents-0.1.0/async_media_agents/graph/workflow.py +145 -0
  12. async_media_agents-0.1.0/async_media_agents/state/shared_memory.py +52 -0
  13. async_media_agents-0.1.0/async_media_agents/state/state.py +38 -0
  14. async_media_agents-0.1.0/async_media_agents/tools/audio_tool.py +29 -0
  15. async_media_agents-0.1.0/async_media_agents/tools/image_tool.py +29 -0
  16. async_media_agents-0.1.0/async_media_agents/tools/mcp_client.py +60 -0
  17. async_media_agents-0.1.0/async_media_agents/tools/mcp_server.py +76 -0
  18. async_media_agents-0.1.0/async_media_agents/tools/processors/__init__.py +7 -0
  19. async_media_agents-0.1.0/async_media_agents/tools/processors/audio_processor.py +60 -0
  20. async_media_agents-0.1.0/async_media_agents/tools/processors/image_processor.py +55 -0
  21. async_media_agents-0.1.0/async_media_agents/tools/processors/video_processor.py +73 -0
  22. async_media_agents-0.1.0/async_media_agents/tools/video_tool.py +29 -0
  23. async_media_agents-0.1.0/async_media_agents/utils/requirements.py +53 -0
  24. async_media_agents-0.1.0/async_media_agents.egg-info/PKG-INFO +292 -0
  25. async_media_agents-0.1.0/async_media_agents.egg-info/SOURCES.txt +28 -0
  26. async_media_agents-0.1.0/async_media_agents.egg-info/dependency_links.txt +1 -0
  27. async_media_agents-0.1.0/async_media_agents.egg-info/requires.txt +98 -0
  28. async_media_agents-0.1.0/async_media_agents.egg-info/top_level.txt +1 -0
  29. async_media_agents-0.1.0/pyproject.toml +142 -0
  30. async_media_agents-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abka Ferguson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,292 @@
1
+ Metadata-Version: 2.4
2
+ Name: async-media-agents
3
+ Version: 0.1.0
4
+ Summary: A multi-agent workflow for processing audio, image, and video data.
5
+ Author-email: Abka012 <fergusonabka012@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: aiofile==3.11.1
11
+ Requires-Dist: annotated-types==0.7.0
12
+ Requires-Dist: anyio==4.13.0
13
+ Requires-Dist: attrs==26.1.0
14
+ Requires-Dist: Authlib==1.7.2
15
+ Requires-Dist: beartype==0.22.9
16
+ Requires-Dist: cachetools==7.1.3
17
+ Requires-Dist: caio==0.9.25
18
+ Requires-Dist: certifi==2026.5.20
19
+ Requires-Dist: cffi==2.0.0
20
+ Requires-Dist: charset-normalizer==3.4.7
21
+ Requires-Dist: click==8.4.0
22
+ Requires-Dist: cryptography==48.0.0
23
+ Requires-Dist: cyclopts==4.15.0
24
+ Requires-Dist: dnspython==2.8.0
25
+ Requires-Dist: docstring_parser==0.18.0
26
+ Requires-Dist: email-validator==2.3.0
27
+ Requires-Dist: exceptiongroup==1.3.1
28
+ Requires-Dist: fastmcp==3.3.1
29
+ Requires-Dist: fastmcp-slim==3.3.1
30
+ Requires-Dist: griffelib==2.0.2
31
+ Requires-Dist: h11==0.16.0
32
+ Requires-Dist: httpcore==1.0.9
33
+ Requires-Dist: httpx==0.28.1
34
+ Requires-Dist: httpx-sse==0.4.3
35
+ Requires-Dist: idna==3.15
36
+ Requires-Dist: jaraco.classes==3.4.0
37
+ Requires-Dist: jaraco.context==6.1.2
38
+ Requires-Dist: jaraco.functools==4.5.0
39
+ Requires-Dist: jeepney==0.9.0
40
+ Requires-Dist: joserfc==1.6.5
41
+ Requires-Dist: jsonpatch==1.33
42
+ Requires-Dist: jsonpointer==3.1.1
43
+ Requires-Dist: jsonref==1.1.0
44
+ Requires-Dist: jsonschema==4.26.0
45
+ Requires-Dist: jsonschema-path==0.5.0
46
+ Requires-Dist: jsonschema-specifications==2025.9.1
47
+ Requires-Dist: keyring==25.7.0
48
+ Requires-Dist: langchain==1.3.1
49
+ Requires-Dist: langchain-core==1.4.0
50
+ Requires-Dist: langchain-protocol==0.0.15
51
+ Requires-Dist: langgraph==1.2.1
52
+ Requires-Dist: langgraph-checkpoint==4.1.0
53
+ Requires-Dist: langgraph-prebuilt==1.1.0
54
+ Requires-Dist: langgraph-sdk==0.3.14
55
+ Requires-Dist: langsmith==0.8.5
56
+ Requires-Dist: markdown-it-py==4.2.0
57
+ Requires-Dist: mcp==1.27.1
58
+ Requires-Dist: mdurl==0.1.2
59
+ Requires-Dist: more-itertools==11.0.2
60
+ Requires-Dist: nodejs-wheel-binaries==24.15.0
61
+ Requires-Dist: numpy==2.4.6
62
+ Requires-Dist: openapi-pydantic==0.5.1
63
+ Requires-Dist: opencv-python-headless==4.13.0.92
64
+ Requires-Dist: opentelemetry-api==1.42.1
65
+ Requires-Dist: orjson==3.11.9
66
+ Requires-Dist: ormsgpack==1.12.2
67
+ Requires-Dist: packaging==26.2
68
+ Requires-Dist: pathable==0.6.0
69
+ Requires-Dist: pillow==12.2.0
70
+ Requires-Dist: platformdirs==4.9.6
71
+ Requires-Dist: py-key-value-aio==0.4.4
72
+ Requires-Dist: pycparser==3.0
73
+ Requires-Dist: pydantic==2.13.4
74
+ Requires-Dist: pydantic-settings==2.14.1
75
+ Requires-Dist: pydantic_core==2.46.4
76
+ Requires-Dist: Pygments==2.20.0
77
+ Requires-Dist: PyJWT==2.13.0
78
+ Requires-Dist: pyperclip==1.11.0
79
+ Requires-Dist: python-dotenv==1.2.2
80
+ Requires-Dist: python-multipart==0.0.29
81
+ Requires-Dist: PyYAML==6.0.3
82
+ Requires-Dist: referencing==0.37.0
83
+ Requires-Dist: requests==2.34.2
84
+ Requires-Dist: requests-toolbelt==1.0.0
85
+ Requires-Dist: rich==15.0.0
86
+ Requires-Dist: rich-rst==2.0.1
87
+ Requires-Dist: rpds-py==0.30.0
88
+ Requires-Dist: SecretStorage==3.5.0
89
+ Requires-Dist: sse-starlette==3.4.4
90
+ Requires-Dist: starlette==1.0.1
91
+ Requires-Dist: tenacity==9.1.4
92
+ Requires-Dist: typing-inspection==0.4.2
93
+ Requires-Dist: typing_extensions==4.15.0
94
+ Requires-Dist: uncalled-for==0.3.2
95
+ Requires-Dist: urllib3==2.7.0
96
+ Requires-Dist: uuid_utils==0.16.0
97
+ Requires-Dist: uvicorn==0.47.0
98
+ Requires-Dist: watchfiles==1.2.0
99
+ Requires-Dist: websockets==16.0
100
+ Requires-Dist: xxhash==3.7.0
101
+ Requires-Dist: zstandard==0.25.0
102
+ Provides-Extra: dev
103
+ Requires-Dist: pytest; extra == "dev"
104
+ Requires-Dist: pytest-asyncio; extra == "dev"
105
+ Requires-Dist: flake8; extra == "dev"
106
+ Requires-Dist: basedpyright==1.39.5; extra == "dev"
107
+ Dynamic: license-file
108
+
109
+ # async-media-agents
110
+
111
+ Async Multi-Agent POC using LangGraph + FastMCP + real media processing.
112
+
113
+ ## Overview
114
+
115
+ A lightweight async multi-agent system that processes images, audio, and video via base64-encoded payloads. Uses LangGraph for workflow orchestration, FastMCP for tool registration, and real processing libraries (Pillow, OpenCV, stdlib `wave`).
116
+
117
+ ## Architecture
118
+
119
+ ```
120
+ User Request
121
+ |
122
+ SupervisorAgent
123
+ |
124
+ Fan-out (parallel conditional edge)
125
+ ┌──────────┼──────────┐
126
+ | | |
127
+ ImageAgent AudioAgent VideoAgent
128
+ | | |
129
+ Pillow stdlib wave OpenCV
130
+ processor processor processor
131
+ | | |
132
+ └──────────┼──────────┘
133
+ Fan-in to Aggregator
134
+ ```
135
+
136
+ ## Features
137
+
138
+ - ✅ **Real media processing** — Powered by Pillow (Images), OpenCV (Video), and stdlib `wave` (Audio).
139
+ - ✅ **Parallel execution** — High-performance fan-out/fan-in orchestration using LangGraph.
140
+ - ✅ **MCP Integration** — Model Context Protocol support via FastMCP for tool registration.
141
+ - ✅ **Robust error handling** — Graceful recovery from corrupt base64 or invalid formats.
142
+ - ✅ **Shared state** — Concurrent-safe state management via `Annotated` reducers.
143
+ - ✅ **Type Safety** — Comprehensive static analysis with `basedpyright`.
144
+
145
+ ## Requirements
146
+
147
+ ```text
148
+ langgraph
149
+ langchain
150
+ fastmcp
151
+ Pillow
152
+ opencv-python-headless
153
+ numpy
154
+ pytest
155
+ pytest-asyncio
156
+ ```
157
+
158
+ ## Setup
159
+
160
+ ### 1. Create and activate a virtual environment
161
+
162
+ ```bash
163
+ # Create the virtual environment
164
+ python -m venv .venv
165
+
166
+ # Activate it
167
+ source .venv/bin/activate # Linux / macOS
168
+ .venv\Scripts\activate # Windows (cmd)
169
+ ```
170
+
171
+ ### 2. Install dependencies
172
+
173
+ ```bash
174
+ pip install -r requirements.txt
175
+ ```
176
+
177
+ ## Project Structure
178
+
179
+ ```text
180
+ project/
181
+ ├── async_media_agents/
182
+ │ ├── agents/
183
+ │ │ ├── image_agent.py # Specialized image logic
184
+ │ │ ├── audio_agent.py # Specialized audio logic
185
+ │ │ ├── video_agent.py # Specialized video logic
186
+ │ │ └── supervisor.py # Orchestration & routing
187
+ │ │
188
+ │ ├── tools/
189
+ │ │ ├── mcp_server.py # FastMCP server registration
190
+ │ │ ├── mcp_client.py # Async client interface
191
+ │ │ └── processors/ # Pure processing logic (decoupled)
192
+ │ │ ├── image_processor.py # Pillow-based
193
+ │ │ ├── audio_processor.py # stdlib wave
194
+ │ │ └── video_processor.py # OpenCV-based
195
+ │ │
196
+ │ ├── graph/
197
+ │ │ └── workflow.py # LangGraph workflow definition
198
+ │ │
199
+ │ ├── state/
200
+ │ │ ├── state.py # Shared AgentState with reducers
201
+ │ │ └── shared_memory.py # Global shared context utilities
202
+ │ │
203
+ │ └── __init__.py
204
+
205
+ ├── scripts/
206
+ │ └── test_workflow.py # Integration & payload tests
207
+
208
+ ├── main.py # CLI demo & examples
209
+ ├── requirements.txt
210
+ ├── pyproject.toml # Project & Tool configuration
211
+ └── README.md
212
+ ```
213
+
214
+ ## Usage
215
+
216
+ ### Run the demo
217
+
218
+ ```bash
219
+ python main.py
220
+ ```
221
+
222
+ The demo demonstrates three core patterns:
223
+ 1. **Text-based routing** — Auto-detects task type from user natural language.
224
+ 2. **Parallel processing** — Simultaneously processes multiple media types via fan-out.
225
+ 3. **Supervisor workflow** — Uses the Supervisor agent to route to a single target.
226
+
227
+ ### Run tests
228
+
229
+ Tests can be executed directly or through `pytest`:
230
+
231
+ ```bash
232
+ # Direct execution (includes payload generation logs)
233
+ python scripts/test_workflow.py
234
+
235
+ # Using pytest
236
+ pytest scripts/test_workflow.py
237
+ ```
238
+
239
+ The test suite covers:
240
+ - Base64 payload integrity (PNG, WAV, MP4)
241
+ - Real metadata extraction via processors
242
+ - Parallel transport through the workflow
243
+ - Partial payload handling
244
+ - Robust error handling for corrupted data
245
+
246
+ ### Programmatic use
247
+
248
+ ```python
249
+ from async_media_agents.graph.workflow import Workflow
250
+
251
+ wf = Workflow()
252
+
253
+ # Text-based — auto-routes to image/audio/video agent
254
+ result = await wf.run("Analyze this image")
255
+
256
+ # Parallel — feeds base64 to all three agents
257
+ result = await wf.run_parallel(
258
+ image_data="base64_encoded_png",
259
+ audio_data="base64_encoded_wav",
260
+ video_data="base64_encoded_mp4",
261
+ )
262
+ ```
263
+
264
+ ## Processing Pipeline
265
+
266
+ | Media | Library | Extracted Metadata |
267
+ |---------|-----------------------|-------------------------------------------------------|
268
+ | Image | Pillow | width, height, format, mode, thumbnail |
269
+ | Audio | stdlib `wave` | duration, sample rate, channels, sample width, frames |
270
+ | Video | opencv-python-headless | duration, fps, resolution, frame count, dimensions |
271
+
272
+ All processors return structured dicts: `{"status": "success", "metadata": {...}}` or `{"status": "error", "error": "..."}`.
273
+
274
+ ## Static Analysis
275
+
276
+ The project uses `basedpyright` for type checking:
277
+
278
+ ```bash
279
+ pip install basedpyright
280
+ basedpyright .
281
+ ```
282
+
283
+ Configuration lives in `pyproject.toml` under `[tool.pyright]`.
284
+
285
+ ## State Management
286
+
287
+ `AgentState` extends `MessagesState` with custom fields. Fields written concurrently during parallel fan-out use `Annotated` reducers for safe merging:
288
+
289
+ - `results` — `_merge_results` dict merge
290
+ - `metadata` — `_merge_results` dict merge
291
+ - `current_agent` — `_last_wins`
292
+ - `error` — `_last_wins`
@@ -0,0 +1,184 @@
1
+ # async-media-agents
2
+
3
+ Async Multi-Agent POC using LangGraph + FastMCP + real media processing.
4
+
5
+ ## Overview
6
+
7
+ A lightweight async multi-agent system that processes images, audio, and video via base64-encoded payloads. Uses LangGraph for workflow orchestration, FastMCP for tool registration, and real processing libraries (Pillow, OpenCV, stdlib `wave`).
8
+
9
+ ## Architecture
10
+
11
+ ```
12
+ User Request
13
+ |
14
+ SupervisorAgent
15
+ |
16
+ Fan-out (parallel conditional edge)
17
+ ┌──────────┼──────────┐
18
+ | | |
19
+ ImageAgent AudioAgent VideoAgent
20
+ | | |
21
+ Pillow stdlib wave OpenCV
22
+ processor processor processor
23
+ | | |
24
+ └──────────┼──────────┘
25
+ Fan-in to Aggregator
26
+ ```
27
+
28
+ ## Features
29
+
30
+ - ✅ **Real media processing** — Powered by Pillow (Images), OpenCV (Video), and stdlib `wave` (Audio).
31
+ - ✅ **Parallel execution** — High-performance fan-out/fan-in orchestration using LangGraph.
32
+ - ✅ **MCP Integration** — Model Context Protocol support via FastMCP for tool registration.
33
+ - ✅ **Robust error handling** — Graceful recovery from corrupt base64 or invalid formats.
34
+ - ✅ **Shared state** — Concurrent-safe state management via `Annotated` reducers.
35
+ - ✅ **Type Safety** — Comprehensive static analysis with `basedpyright`.
36
+
37
+ ## Requirements
38
+
39
+ ```text
40
+ langgraph
41
+ langchain
42
+ fastmcp
43
+ Pillow
44
+ opencv-python-headless
45
+ numpy
46
+ pytest
47
+ pytest-asyncio
48
+ ```
49
+
50
+ ## Setup
51
+
52
+ ### 1. Create and activate a virtual environment
53
+
54
+ ```bash
55
+ # Create the virtual environment
56
+ python -m venv .venv
57
+
58
+ # Activate it
59
+ source .venv/bin/activate # Linux / macOS
60
+ .venv\Scripts\activate # Windows (cmd)
61
+ ```
62
+
63
+ ### 2. Install dependencies
64
+
65
+ ```bash
66
+ pip install -r requirements.txt
67
+ ```
68
+
69
+ ## Project Structure
70
+
71
+ ```text
72
+ project/
73
+ ├── async_media_agents/
74
+ │ ├── agents/
75
+ │ │ ├── image_agent.py # Specialized image logic
76
+ │ │ ├── audio_agent.py # Specialized audio logic
77
+ │ │ ├── video_agent.py # Specialized video logic
78
+ │ │ └── supervisor.py # Orchestration & routing
79
+ │ │
80
+ │ ├── tools/
81
+ │ │ ├── mcp_server.py # FastMCP server registration
82
+ │ │ ├── mcp_client.py # Async client interface
83
+ │ │ └── processors/ # Pure processing logic (decoupled)
84
+ │ │ ├── image_processor.py # Pillow-based
85
+ │ │ ├── audio_processor.py # stdlib wave
86
+ │ │ └── video_processor.py # OpenCV-based
87
+ │ │
88
+ │ ├── graph/
89
+ │ │ └── workflow.py # LangGraph workflow definition
90
+ │ │
91
+ │ ├── state/
92
+ │ │ ├── state.py # Shared AgentState with reducers
93
+ │ │ └── shared_memory.py # Global shared context utilities
94
+ │ │
95
+ │ └── __init__.py
96
+
97
+ ├── scripts/
98
+ │ └── test_workflow.py # Integration & payload tests
99
+
100
+ ├── main.py # CLI demo & examples
101
+ ├── requirements.txt
102
+ ├── pyproject.toml # Project & Tool configuration
103
+ └── README.md
104
+ ```
105
+
106
+ ## Usage
107
+
108
+ ### Run the demo
109
+
110
+ ```bash
111
+ python main.py
112
+ ```
113
+
114
+ The demo demonstrates three core patterns:
115
+ 1. **Text-based routing** — Auto-detects task type from user natural language.
116
+ 2. **Parallel processing** — Simultaneously processes multiple media types via fan-out.
117
+ 3. **Supervisor workflow** — Uses the Supervisor agent to route to a single target.
118
+
119
+ ### Run tests
120
+
121
+ Tests can be executed directly or through `pytest`:
122
+
123
+ ```bash
124
+ # Direct execution (includes payload generation logs)
125
+ python scripts/test_workflow.py
126
+
127
+ # Using pytest
128
+ pytest scripts/test_workflow.py
129
+ ```
130
+
131
+ The test suite covers:
132
+ - Base64 payload integrity (PNG, WAV, MP4)
133
+ - Real metadata extraction via processors
134
+ - Parallel transport through the workflow
135
+ - Partial payload handling
136
+ - Robust error handling for corrupted data
137
+
138
+ ### Programmatic use
139
+
140
+ ```python
141
+ from async_media_agents.graph.workflow import Workflow
142
+
143
+ wf = Workflow()
144
+
145
+ # Text-based — auto-routes to image/audio/video agent
146
+ result = await wf.run("Analyze this image")
147
+
148
+ # Parallel — feeds base64 to all three agents
149
+ result = await wf.run_parallel(
150
+ image_data="base64_encoded_png",
151
+ audio_data="base64_encoded_wav",
152
+ video_data="base64_encoded_mp4",
153
+ )
154
+ ```
155
+
156
+ ## Processing Pipeline
157
+
158
+ | Media | Library | Extracted Metadata |
159
+ |---------|-----------------------|-------------------------------------------------------|
160
+ | Image | Pillow | width, height, format, mode, thumbnail |
161
+ | Audio | stdlib `wave` | duration, sample rate, channels, sample width, frames |
162
+ | Video | opencv-python-headless | duration, fps, resolution, frame count, dimensions |
163
+
164
+ All processors return structured dicts: `{"status": "success", "metadata": {...}}` or `{"status": "error", "error": "..."}`.
165
+
166
+ ## Static Analysis
167
+
168
+ The project uses `basedpyright` for type checking:
169
+
170
+ ```bash
171
+ pip install basedpyright
172
+ basedpyright .
173
+ ```
174
+
175
+ Configuration lives in `pyproject.toml` under `[tool.pyright]`.
176
+
177
+ ## State Management
178
+
179
+ `AgentState` extends `MessagesState` with custom fields. Fields written concurrently during parallel fan-out use `Annotated` reducers for safe merging:
180
+
181
+ - `results` — `_merge_results` dict merge
182
+ - `metadata` — `_merge_results` dict merge
183
+ - `current_agent` — `_last_wins`
184
+ - `error` — `_last_wins`
@@ -0,0 +1,7 @@
1
+ # Async Multi-Agent System Package
2
+ """Multi-agent orchestration system using LangGraph and FastMCP."""
3
+
4
+ from async_media_agents.state.state import AgentState
5
+ from async_media_agents.utils.requirements import update_requirements
6
+
7
+ __all__ = ["AgentState", "update_requirements"]
@@ -0,0 +1,13 @@
1
+ """Multi-agent system package."""
2
+
3
+ from async_media_agents.agents.audio_agent import AudioAgent
4
+ from async_media_agents.agents.image_agent import ImageAgent
5
+ from async_media_agents.agents.supervisor import SupervisorAgent
6
+ from async_media_agents.agents.video_agent import VideoAgent
7
+
8
+ __all__ = [
9
+ "ImageAgent",
10
+ "AudioAgent",
11
+ "VideoAgent",
12
+ "SupervisorAgent",
13
+ ]
@@ -0,0 +1,78 @@
1
+ """Audio processing agent."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+
7
+ from async_media_agents.state.state import AgentState
8
+
9
+
10
+ class AudioAgent:
11
+ """Agent specialized in audio processing."""
12
+
13
+ system_prompt: str = """You are an audio processing agent.
14
+
15
+ Your role is to analyze and process audio based on user requests.
16
+ You can:
17
+ - Extract audio metadata (duration, sample rate, etc.)
18
+ - Detect audio content (speech, music, noise)
19
+ - Identify audio characteristics (volume, pitch)
20
+ - Process audio streams
21
+
22
+ Always use the available tools for processing audio.
23
+ """
24
+
25
+ def __init__(self) -> None:
26
+ self.name: str = "audio_agent"
27
+ self.prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
28
+ [
29
+ ("system", self.system_prompt),
30
+ ("user", "{input}"),
31
+ ]
32
+ )
33
+
34
+ async def process_audio(self, audio_data: str, format: str = "wav") -> str:
35
+ """Process audio data and return results."""
36
+ from async_media_agents.tools.mcp_client import mcp_client
37
+
38
+ result = await mcp_client.process_audio(audio_data, format)
39
+ if result.get("status") == "error":
40
+ return f"Error: {result.get('error', 'unknown error')}"
41
+ return result.get("result", "No result returned")
42
+
43
+ async def extract_info(self, audio_data: str) -> dict:
44
+ """Extract metadata from audio."""
45
+ from async_media_agents.tools.mcp_client import mcp_client
46
+
47
+ return await mcp_client.process_audio(audio_data, "wav")
48
+
49
+ async def run(self, state: AgentState) -> dict[str, Any]:
50
+ """Run the audio agent."""
51
+ task_data = state.get("task_data", {})
52
+
53
+ audio_data = task_data.get("audio_data", "")
54
+
55
+ result: dict[str, Any] = {
56
+ "current_agent": self.name,
57
+ "results": {
58
+ "audio_agent": {
59
+ "status": "completed",
60
+ "task_type": "audio",
61
+ },
62
+ },
63
+ }
64
+
65
+ if audio_data:
66
+ processed = await self.process_audio(audio_data, "wav")
67
+ result["results"]["audio_agent"]["processing_result"] = processed
68
+
69
+ return result
70
+
71
+ async def process(self, audio_data: str) -> dict[str, Any]:
72
+ """Process audio directly."""
73
+ result = await self.process_audio(audio_data, "wav")
74
+ return {
75
+ "status": "success" if not result.startswith("Error") else "error",
76
+ "agent": self.name,
77
+ "result": result,
78
+ }
@@ -0,0 +1,79 @@
1
+ """Image processing agent."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+
7
+ from async_media_agents.state.state import AgentState
8
+
9
+
10
+ class ImageAgent:
11
+ """Agent specialized in image processing."""
12
+
13
+ system_prompt: str = """You are an image processing agent.
14
+
15
+ Your role is to analyze and process images based on user requests.
16
+ You can:
17
+ - Extract image metadata
18
+ - Identify objects, scenes, or people in images
19
+ - Detect image properties (size, format, colors)
20
+ - Apply filters or transformations
21
+
22
+ Always use the available tools for processing images.
23
+ """
24
+
25
+ def __init__(self) -> None:
26
+ self.name: str = "image_agent"
27
+ self.prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
28
+ [
29
+ ("system", self.system_prompt),
30
+ ("user", "{input}"),
31
+ ]
32
+ )
33
+
34
+ async def process_image(self, image_data: str, format: str = "png") -> str:
35
+ """Process image data and return results."""
36
+ from async_media_agents.tools.mcp_client import mcp_client
37
+
38
+ result = await mcp_client.process_image(image_data, format)
39
+ if result.get("status") == "error":
40
+ return f"Error: {result.get('error', 'unknown error')}"
41
+ return result.get("result", "No result returned")
42
+
43
+ async def extract_info(self, image_data: str) -> dict:
44
+ """Extract metadata from image."""
45
+ from async_media_agents.tools.mcp_client import mcp_client
46
+
47
+ return await mcp_client.process_image(image_data, "png")
48
+
49
+ async def run(self, state: AgentState) -> dict[str, Any]:
50
+ """Run the image agent."""
51
+ task_data = state.get("task_data", {})
52
+
53
+ # Get image data from task
54
+ image_data = task_data.get("image_data", "")
55
+
56
+ result: dict[str, Any] = {
57
+ "current_agent": self.name,
58
+ "results": {
59
+ "image_agent": {
60
+ "status": "completed",
61
+ "task_type": "image",
62
+ },
63
+ },
64
+ }
65
+
66
+ if image_data:
67
+ processed = await self.process_image(image_data, "png")
68
+ result["results"]["image_agent"]["processing_result"] = processed
69
+
70
+ return result
71
+
72
+ async def process(self, image_data: str) -> dict[str, Any]:
73
+ """Process image directly."""
74
+ result = await self.process_image(image_data, "png")
75
+ return {
76
+ "status": "success" if not result.startswith("Error") else "error",
77
+ "agent": self.name,
78
+ "result": result,
79
+ }