ambivo-agents 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ambivo_agents/__init__.py +89 -0
- ambivo_agents/agents/__init__.py +19 -0
- ambivo_agents/agents/assistant.py +79 -0
- ambivo_agents/agents/code_executor.py +133 -0
- ambivo_agents/agents/knowledge_base.py +595 -0
- ambivo_agents/agents/media_editor.py +777 -0
- ambivo_agents/agents/simple_web_search.py +404 -0
- ambivo_agents/agents/web_scraper.py +682 -0
- ambivo_agents/agents/web_search.py +660 -0
- ambivo_agents/agents/youtube_download.py +553 -0
- ambivo_agents/cli.py +1871 -0
- ambivo_agents/config/__init__.py +4 -0
- ambivo_agents/config/loader.py +301 -0
- ambivo_agents/core/__init__.py +33 -0
- ambivo_agents/core/base.py +880 -0
- ambivo_agents/core/llm.py +333 -0
- ambivo_agents/core/memory.py +640 -0
- ambivo_agents/executors/__init__.py +8 -0
- ambivo_agents/executors/docker_executor.py +108 -0
- ambivo_agents/executors/media_executor.py +237 -0
- ambivo_agents/executors/youtube_executor.py +404 -0
- ambivo_agents/services/__init__.py +6 -0
- ambivo_agents/services/agent_service.py +590 -0
- ambivo_agents/services/factory.py +366 -0
- ambivo_agents-1.3.3.dist-info/METADATA +773 -0
- ambivo_agents-1.3.3.dist-info/RECORD +30 -0
- ambivo_agents-1.3.3.dist-info/WHEEL +5 -0
- ambivo_agents-1.3.3.dist-info/entry_points.txt +3 -0
- ambivo_agents-1.3.3.dist-info/licenses/LICENSE +21 -0
- ambivo_agents-1.3.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,777 @@
|
|
1
|
+
# ambivo_agents/agents/media_editor.py
|
2
|
+
"""
|
3
|
+
Media Editor Agent with FFmpeg Integration
|
4
|
+
Handles audio/video processing using Docker containers with ffmpeg
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import json
|
9
|
+
import uuid
|
10
|
+
import time
|
11
|
+
import tempfile
|
12
|
+
import shutil
|
13
|
+
import os
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Dict, List, Any, Optional, Union
|
16
|
+
from datetime import datetime, timedelta
|
17
|
+
|
18
|
+
from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
|
19
|
+
from ..config.loader import load_config, get_config_section
|
20
|
+
from ..executors.media_executor import MediaDockerExecutor
|
21
|
+
|
22
|
+
|
23
|
+
class MediaEditorAgent(BaseAgent):
|
24
|
+
"""Media Editor Agent for audio/video processing using FFmpeg"""
|
25
|
+
|
26
|
+
def __init__(self, agent_id: str| None = None, memory_manager=None, llm_service=None, **kwargs):
|
27
|
+
if agent_id is None:
|
28
|
+
agent_id = f"media_editor_{str(uuid.uuid4())[:8]}"
|
29
|
+
|
30
|
+
super().__init__(
|
31
|
+
agent_id=agent_id,
|
32
|
+
role=AgentRole.CODE_EXECUTOR, # Using CODE_EXECUTOR role for media processing
|
33
|
+
memory_manager=memory_manager,
|
34
|
+
llm_service=llm_service,
|
35
|
+
name="Media Editor Agent",
|
36
|
+
description="Agent for audio/video processing, transcoding, and editing using FFmpeg",
|
37
|
+
**kwargs
|
38
|
+
)
|
39
|
+
|
40
|
+
# Load media configuration from YAML
|
41
|
+
try:
|
42
|
+
config = load_config()
|
43
|
+
self.media_config = get_config_section('media_editor', config)
|
44
|
+
except Exception as e:
|
45
|
+
raise ValueError(f"media_editor configuration not found in agent_config.yaml: {e}")
|
46
|
+
|
47
|
+
# Initialize media Docker executor
|
48
|
+
self.media_executor = MediaDockerExecutor(self.media_config)
|
49
|
+
|
50
|
+
# Add media processing tools
|
51
|
+
self._add_media_tools()
|
52
|
+
|
53
|
+
def _add_media_tools(self):
|
54
|
+
"""Add all media processing tools"""
|
55
|
+
|
56
|
+
# Extract audio from video
|
57
|
+
self.add_tool(AgentTool(
|
58
|
+
name="extract_audio_from_video",
|
59
|
+
description="Extract audio track from video file",
|
60
|
+
function=self._extract_audio_from_video,
|
61
|
+
parameters_schema={
|
62
|
+
"type": "object",
|
63
|
+
"properties": {
|
64
|
+
"input_video": {"type": "string", "description": "Path to input video file"},
|
65
|
+
"output_format": {"type": "string", "enum": ["mp3", "wav", "aac", "flac"], "default": "mp3"},
|
66
|
+
"audio_quality": {"type": "string", "enum": ["high", "medium", "low"], "default": "medium"}
|
67
|
+
},
|
68
|
+
"required": ["input_video"]
|
69
|
+
}
|
70
|
+
))
|
71
|
+
|
72
|
+
# Convert video format
|
73
|
+
self.add_tool(AgentTool(
|
74
|
+
name="convert_video_format",
|
75
|
+
description="Convert video to different format/codec",
|
76
|
+
function=self._convert_video_format,
|
77
|
+
parameters_schema={
|
78
|
+
"type": "object",
|
79
|
+
"properties": {
|
80
|
+
"input_video": {"type": "string", "description": "Path to input video file"},
|
81
|
+
"output_format": {"type": "string", "enum": ["mp4", "avi", "mov", "mkv", "webm"], "default": "mp4"},
|
82
|
+
"video_codec": {"type": "string", "enum": ["h264", "h265", "vp9", "copy"], "default": "h264"},
|
83
|
+
"audio_codec": {"type": "string", "enum": ["aac", "mp3", "opus", "copy"], "default": "aac"},
|
84
|
+
"crf": {"type": "integer", "minimum": 0, "maximum": 51, "default": 23}
|
85
|
+
},
|
86
|
+
"required": ["input_video"]
|
87
|
+
}
|
88
|
+
))
|
89
|
+
|
90
|
+
# Resize video
|
91
|
+
self.add_tool(AgentTool(
|
92
|
+
name="resize_video",
|
93
|
+
description="Resize video to specific dimensions",
|
94
|
+
function=self._resize_video,
|
95
|
+
parameters_schema={
|
96
|
+
"type": "object",
|
97
|
+
"properties": {
|
98
|
+
"input_video": {"type": "string", "description": "Path to input video file"},
|
99
|
+
"width": {"type": "integer", "description": "Target width in pixels"},
|
100
|
+
"height": {"type": "integer", "description": "Target height in pixels"},
|
101
|
+
"maintain_aspect": {"type": "boolean", "default": True},
|
102
|
+
"preset": {"type": "string", "enum": ["720p", "1080p", "4k", "480p", "custom"], "default": "custom"}
|
103
|
+
},
|
104
|
+
"required": ["input_video"]
|
105
|
+
}
|
106
|
+
))
|
107
|
+
|
108
|
+
# Get media information
|
109
|
+
self.add_tool(AgentTool(
|
110
|
+
name="get_media_info",
|
111
|
+
description="Get detailed information about media file",
|
112
|
+
function=self._get_media_info,
|
113
|
+
parameters_schema={
|
114
|
+
"type": "object",
|
115
|
+
"properties": {
|
116
|
+
"file_path": {"type": "string", "description": "Path to media file"}
|
117
|
+
},
|
118
|
+
"required": ["file_path"]
|
119
|
+
}
|
120
|
+
))
|
121
|
+
|
122
|
+
# Trim media
|
123
|
+
self.add_tool(AgentTool(
|
124
|
+
name="trim_media",
|
125
|
+
description="Trim/cut media file to specific time range",
|
126
|
+
function=self._trim_media,
|
127
|
+
parameters_schema={
|
128
|
+
"type": "object",
|
129
|
+
"properties": {
|
130
|
+
"input_file": {"type": "string", "description": "Path to input media file"},
|
131
|
+
"start_time": {"type": "string", "description": "Start time (HH:MM:SS or seconds)"},
|
132
|
+
"duration": {"type": "string", "description": "Duration (HH:MM:SS or seconds)"},
|
133
|
+
"end_time": {"type": "string", "description": "End time (alternative to duration)"}
|
134
|
+
},
|
135
|
+
"required": ["input_file", "start_time"]
|
136
|
+
}
|
137
|
+
))
|
138
|
+
|
139
|
+
# Create video thumbnail
|
140
|
+
self.add_tool(AgentTool(
|
141
|
+
name="create_video_thumbnail",
|
142
|
+
description="Extract thumbnail/frame from video",
|
143
|
+
function=self._create_video_thumbnail,
|
144
|
+
parameters_schema={
|
145
|
+
"type": "object",
|
146
|
+
"properties": {
|
147
|
+
"input_video": {"type": "string", "description": "Path to input video file"},
|
148
|
+
"timestamp": {"type": "string", "description": "Time to extract frame (HH:MM:SS)",
|
149
|
+
"default": "00:00:05"},
|
150
|
+
"output_format": {"type": "string", "enum": ["jpg", "png", "bmp"], "default": "jpg"},
|
151
|
+
"width": {"type": "integer", "description": "Thumbnail width", "default": 320}
|
152
|
+
},
|
153
|
+
"required": ["input_video"]
|
154
|
+
}
|
155
|
+
))
|
156
|
+
|
157
|
+
# Merge audio and video
|
158
|
+
self.add_tool(AgentTool(
|
159
|
+
name="merge_audio_video",
|
160
|
+
description="Combine separate audio and video files",
|
161
|
+
function=self._merge_audio_video,
|
162
|
+
parameters_schema={
|
163
|
+
"type": "object",
|
164
|
+
"properties": {
|
165
|
+
"video_file": {"type": "string", "description": "Path to video file"},
|
166
|
+
"audio_file": {"type": "string", "description": "Path to audio file"},
|
167
|
+
"output_format": {"type": "string", "enum": ["mp4", "mkv", "avi"], "default": "mp4"}
|
168
|
+
},
|
169
|
+
"required": ["video_file", "audio_file"]
|
170
|
+
}
|
171
|
+
))
|
172
|
+
|
173
|
+
# Adjust audio volume
|
174
|
+
self.add_tool(AgentTool(
|
175
|
+
name="adjust_audio_volume",
|
176
|
+
description="Adjust audio volume/gain",
|
177
|
+
function=self._adjust_audio_volume,
|
178
|
+
parameters_schema={
|
179
|
+
"type": "object",
|
180
|
+
"properties": {
|
181
|
+
"input_file": {"type": "string", "description": "Path to audio/video file"},
|
182
|
+
"volume_change": {"type": "string", "description": "Volume change (+10dB, -5dB, 0.5, 2.0)"},
|
183
|
+
"normalize": {"type": "boolean", "description": "Normalize audio levels", "default": False}
|
184
|
+
},
|
185
|
+
"required": ["input_file", "volume_change"]
|
186
|
+
}
|
187
|
+
))
|
188
|
+
|
189
|
+
# Convert audio format
|
190
|
+
self.add_tool(AgentTool(
|
191
|
+
name="convert_audio_format",
|
192
|
+
description="Convert audio to different format",
|
193
|
+
function=self._convert_audio_format,
|
194
|
+
parameters_schema={
|
195
|
+
"type": "object",
|
196
|
+
"properties": {
|
197
|
+
"input_audio": {"type": "string", "description": "Path to input audio file"},
|
198
|
+
"output_format": {"type": "string", "enum": ["mp3", "wav", "aac", "flac", "ogg"], "default": "mp3"},
|
199
|
+
"bitrate": {"type": "string", "description": "Audio bitrate (128k, 192k, 320k)", "default": "192k"},
|
200
|
+
"sample_rate": {"type": "integer", "description": "Sample rate (44100, 48000)", "default": 44100}
|
201
|
+
},
|
202
|
+
"required": ["input_audio"]
|
203
|
+
}
|
204
|
+
))
|
205
|
+
|
206
|
+
async def _extract_audio_from_video(self, input_video: str, output_format: str = "mp3",
|
207
|
+
audio_quality: str = "medium") -> Dict[str, Any]:
|
208
|
+
"""Extract audio from video file"""
|
209
|
+
try:
|
210
|
+
if not Path(input_video).exists():
|
211
|
+
return {"success": False, "error": f"Input video file not found: {input_video}"}
|
212
|
+
|
213
|
+
# Quality settings
|
214
|
+
quality_settings = {
|
215
|
+
"low": "-b:a 128k",
|
216
|
+
"medium": "-b:a 192k",
|
217
|
+
"high": "-b:a 320k"
|
218
|
+
}
|
219
|
+
|
220
|
+
output_filename = f"extracted_audio_{int(time.time())}.{output_format}"
|
221
|
+
|
222
|
+
ffmpeg_command = (
|
223
|
+
f"ffmpeg -i ${{input_video}} "
|
224
|
+
f"{quality_settings.get(audio_quality, quality_settings['medium'])} "
|
225
|
+
f"-vn -acodec {self._get_audio_codec(output_format)} "
|
226
|
+
f"${{OUTPUT}}"
|
227
|
+
)
|
228
|
+
|
229
|
+
result = self.media_executor.execute_ffmpeg_command(
|
230
|
+
ffmpeg_command=ffmpeg_command,
|
231
|
+
input_files={'input_video': input_video},
|
232
|
+
output_filename=output_filename
|
233
|
+
)
|
234
|
+
|
235
|
+
if result['success']:
|
236
|
+
return {
|
237
|
+
"success": True,
|
238
|
+
"message": f"Audio extracted successfully to {output_format}",
|
239
|
+
"output_file": result['output_file'],
|
240
|
+
"input_video": input_video,
|
241
|
+
"execution_time": result['execution_time']
|
242
|
+
}
|
243
|
+
else:
|
244
|
+
return result
|
245
|
+
|
246
|
+
except Exception as e:
|
247
|
+
return {"success": False, "error": str(e)}
|
248
|
+
|
249
|
+
async def _convert_video_format(self, input_video: str, output_format: str = "mp4",
|
250
|
+
video_codec: str = "h264", audio_codec: str = "aac",
|
251
|
+
crf: int = 23) -> Dict[str, Any]:
|
252
|
+
"""Convert video to different format"""
|
253
|
+
try:
|
254
|
+
if not Path(input_video).exists():
|
255
|
+
return {"success": False, "error": f"Input video file not found: {input_video}"}
|
256
|
+
|
257
|
+
output_filename = f"converted_video_{int(time.time())}.{output_format}"
|
258
|
+
|
259
|
+
# Build codec parameters
|
260
|
+
video_params = f"-c:v {video_codec}" if video_codec != "copy" else "-c:v copy"
|
261
|
+
audio_params = f"-c:a {audio_codec}" if audio_codec != "copy" else "-c:a copy"
|
262
|
+
|
263
|
+
if video_codec in ["h264", "h265"] and video_codec != "copy":
|
264
|
+
video_params += f" -crf {crf}"
|
265
|
+
|
266
|
+
ffmpeg_command = (
|
267
|
+
f"ffmpeg -i ${{input_video}} "
|
268
|
+
f"{video_params} {audio_params} "
|
269
|
+
f"-preset medium "
|
270
|
+
f"${{OUTPUT}}"
|
271
|
+
)
|
272
|
+
|
273
|
+
result = self.media_executor.execute_ffmpeg_command(
|
274
|
+
ffmpeg_command=ffmpeg_command,
|
275
|
+
input_files={'input_video': input_video},
|
276
|
+
output_filename=output_filename
|
277
|
+
)
|
278
|
+
|
279
|
+
if result['success']:
|
280
|
+
return {
|
281
|
+
"success": True,
|
282
|
+
"message": f"Video converted successfully to {output_format}",
|
283
|
+
"output_file": result['output_file'],
|
284
|
+
"input_video": input_video,
|
285
|
+
"conversion_settings": {
|
286
|
+
"output_format": output_format,
|
287
|
+
"video_codec": video_codec,
|
288
|
+
"audio_codec": audio_codec,
|
289
|
+
"crf": crf
|
290
|
+
},
|
291
|
+
"execution_time": result['execution_time']
|
292
|
+
}
|
293
|
+
else:
|
294
|
+
return result
|
295
|
+
|
296
|
+
except Exception as e:
|
297
|
+
return {"success": False, "error": str(e)}
|
298
|
+
|
299
|
+
async def _resize_video(self, input_video: str, width: int = None, height: int = None,
|
300
|
+
maintain_aspect: bool = True, preset: str = "custom") -> Dict[str, Any]:
|
301
|
+
"""Resize video to specific dimensions"""
|
302
|
+
try:
|
303
|
+
if not Path(input_video).exists():
|
304
|
+
return {"success": False, "error": f"Input video file not found: {input_video}"}
|
305
|
+
|
306
|
+
# Handle presets
|
307
|
+
if preset != "custom":
|
308
|
+
preset_dimensions = {
|
309
|
+
"480p": (854, 480),
|
310
|
+
"720p": (1280, 720),
|
311
|
+
"1080p": (1920, 1080),
|
312
|
+
"4k": (3840, 2160)
|
313
|
+
}
|
314
|
+
if preset in preset_dimensions:
|
315
|
+
width, height = preset_dimensions[preset]
|
316
|
+
|
317
|
+
if not width or not height:
|
318
|
+
return {"success": False, "error": "Width and height must be specified"}
|
319
|
+
|
320
|
+
output_filename = f"resized_video_{width}x{height}_{int(time.time())}.mp4"
|
321
|
+
|
322
|
+
# Scale filter with aspect ratio handling
|
323
|
+
if maintain_aspect:
|
324
|
+
scale_filter = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2"
|
325
|
+
else:
|
326
|
+
scale_filter = f"scale={width}:{height}"
|
327
|
+
|
328
|
+
ffmpeg_command = (
|
329
|
+
f"ffmpeg -i ${{input_video}} "
|
330
|
+
f"-vf \"{scale_filter}\" "
|
331
|
+
f"-c:a copy "
|
332
|
+
f"${{OUTPUT}}"
|
333
|
+
)
|
334
|
+
|
335
|
+
result = self.media_executor.execute_ffmpeg_command(
|
336
|
+
ffmpeg_command=ffmpeg_command,
|
337
|
+
input_files={'input_video': input_video},
|
338
|
+
output_filename=output_filename
|
339
|
+
)
|
340
|
+
|
341
|
+
if result['success']:
|
342
|
+
return {
|
343
|
+
"success": True,
|
344
|
+
"message": f"Video resized successfully to {width}x{height}",
|
345
|
+
"output_file": result['output_file'],
|
346
|
+
"input_video": input_video,
|
347
|
+
"resize_settings": {
|
348
|
+
"width": width,
|
349
|
+
"height": height,
|
350
|
+
"maintain_aspect": maintain_aspect,
|
351
|
+
"preset": preset
|
352
|
+
},
|
353
|
+
"execution_time": result['execution_time']
|
354
|
+
}
|
355
|
+
else:
|
356
|
+
return result
|
357
|
+
|
358
|
+
except Exception as e:
|
359
|
+
return {"success": False, "error": str(e)}
|
360
|
+
|
361
|
+
async def _get_media_info(self, file_path: str) -> Dict[str, Any]:
|
362
|
+
"""Get detailed media file information"""
|
363
|
+
try:
|
364
|
+
result = self.media_executor.get_media_info(file_path)
|
365
|
+
|
366
|
+
if result['success']:
|
367
|
+
return {
|
368
|
+
"success": True,
|
369
|
+
"message": "Media information retrieved successfully",
|
370
|
+
"file_path": file_path,
|
371
|
+
"media_info": result.get('media_info', {}),
|
372
|
+
"raw_output": result.get('raw_output', '')
|
373
|
+
}
|
374
|
+
else:
|
375
|
+
return result
|
376
|
+
|
377
|
+
except Exception as e:
|
378
|
+
return {"success": False, "error": str(e)}
|
379
|
+
|
380
|
+
async def _trim_media(self, input_file: str, start_time: str,
|
381
|
+
duration: str = None, end_time: str = None) -> Dict[str, Any]:
|
382
|
+
"""Trim media file to specific time range"""
|
383
|
+
try:
|
384
|
+
if not Path(input_file).exists():
|
385
|
+
return {"success": False, "error": f"Input file not found: {input_file}"}
|
386
|
+
|
387
|
+
if not duration and not end_time:
|
388
|
+
return {"success": False, "error": "Either duration or end_time must be specified"}
|
389
|
+
|
390
|
+
file_ext = Path(input_file).suffix
|
391
|
+
output_filename = f"trimmed_media_{int(time.time())}{file_ext}"
|
392
|
+
|
393
|
+
# Build time parameters
|
394
|
+
time_params = f"-ss {start_time}"
|
395
|
+
if duration:
|
396
|
+
time_params += f" -t {duration}"
|
397
|
+
elif end_time:
|
398
|
+
time_params += f" -to {end_time}"
|
399
|
+
|
400
|
+
ffmpeg_command = (
|
401
|
+
f"ffmpeg -i ${{input_file}} "
|
402
|
+
f"{time_params} "
|
403
|
+
f"-c copy "
|
404
|
+
f"${{OUTPUT}}"
|
405
|
+
)
|
406
|
+
|
407
|
+
result = self.media_executor.execute_ffmpeg_command(
|
408
|
+
ffmpeg_command=ffmpeg_command,
|
409
|
+
input_files={'input_file': input_file},
|
410
|
+
output_filename=output_filename
|
411
|
+
)
|
412
|
+
|
413
|
+
if result['success']:
|
414
|
+
return {
|
415
|
+
"success": True,
|
416
|
+
"message": f"Media trimmed successfully",
|
417
|
+
"output_file": result['output_file'],
|
418
|
+
"input_file": input_file,
|
419
|
+
"trim_settings": {
|
420
|
+
"start_time": start_time,
|
421
|
+
"duration": duration,
|
422
|
+
"end_time": end_time
|
423
|
+
},
|
424
|
+
"execution_time": result['execution_time']
|
425
|
+
}
|
426
|
+
else:
|
427
|
+
return result
|
428
|
+
|
429
|
+
except Exception as e:
|
430
|
+
return {"success": False, "error": str(e)}
|
431
|
+
|
432
|
+
async def _create_video_thumbnail(self, input_video: str, timestamp: str = "00:00:05",
|
433
|
+
output_format: str = "jpg", width: int = 320) -> Dict[str, Any]:
|
434
|
+
"""Create thumbnail from video"""
|
435
|
+
try:
|
436
|
+
if not Path(input_video).exists():
|
437
|
+
return {"success": False, "error": f"Input video file not found: {input_video}"}
|
438
|
+
|
439
|
+
output_filename = f"thumbnail_{int(time.time())}.{output_format}"
|
440
|
+
|
441
|
+
ffmpeg_command = (
|
442
|
+
f"ffmpeg -i ${{input_video}} "
|
443
|
+
f"-ss {timestamp} "
|
444
|
+
f"-vframes 1 "
|
445
|
+
f"-vf scale={width}:-1 "
|
446
|
+
f"${{OUTPUT}}"
|
447
|
+
)
|
448
|
+
|
449
|
+
result = self.media_executor.execute_ffmpeg_command(
|
450
|
+
ffmpeg_command=ffmpeg_command,
|
451
|
+
input_files={'input_video': input_video},
|
452
|
+
output_filename=output_filename
|
453
|
+
)
|
454
|
+
|
455
|
+
if result['success']:
|
456
|
+
return {
|
457
|
+
"success": True,
|
458
|
+
"message": f"Thumbnail created successfully",
|
459
|
+
"output_file": result['output_file'],
|
460
|
+
"input_video": input_video,
|
461
|
+
"thumbnail_settings": {
|
462
|
+
"timestamp": timestamp,
|
463
|
+
"output_format": output_format,
|
464
|
+
"width": width
|
465
|
+
},
|
466
|
+
"execution_time": result['execution_time']
|
467
|
+
}
|
468
|
+
else:
|
469
|
+
return result
|
470
|
+
|
471
|
+
except Exception as e:
|
472
|
+
return {"success": False, "error": str(e)}
|
473
|
+
|
474
|
+
async def _merge_audio_video(self, video_file: str, audio_file: str,
|
475
|
+
output_format: str = "mp4") -> Dict[str, Any]:
|
476
|
+
"""Merge separate audio and video files"""
|
477
|
+
try:
|
478
|
+
if not Path(video_file).exists():
|
479
|
+
return {"success": False, "error": f"Video file not found: {video_file}"}
|
480
|
+
if not Path(audio_file).exists():
|
481
|
+
return {"success": False, "error": f"Audio file not found: {audio_file}"}
|
482
|
+
|
483
|
+
output_filename = f"merged_av_{int(time.time())}.{output_format}"
|
484
|
+
|
485
|
+
ffmpeg_command = (
|
486
|
+
f"ffmpeg -i ${{video_file}} -i ${{audio_file}} "
|
487
|
+
f"-c:v copy -c:a aac "
|
488
|
+
f"-shortest "
|
489
|
+
f"${{OUTPUT}}"
|
490
|
+
)
|
491
|
+
|
492
|
+
result = self.media_executor.execute_ffmpeg_command(
|
493
|
+
ffmpeg_command=ffmpeg_command,
|
494
|
+
input_files={'video_file': video_file, 'audio_file': audio_file},
|
495
|
+
output_filename=output_filename
|
496
|
+
)
|
497
|
+
|
498
|
+
if result['success']:
|
499
|
+
return {
|
500
|
+
"success": True,
|
501
|
+
"message": f"Audio and video merged successfully",
|
502
|
+
"output_file": result['output_file'],
|
503
|
+
"input_files": {
|
504
|
+
"video": video_file,
|
505
|
+
"audio": audio_file
|
506
|
+
},
|
507
|
+
"execution_time": result['execution_time']
|
508
|
+
}
|
509
|
+
else:
|
510
|
+
return result
|
511
|
+
|
512
|
+
except Exception as e:
|
513
|
+
return {"success": False, "error": str(e)}
|
514
|
+
|
515
|
+
async def _adjust_audio_volume(self, input_file: str, volume_change: str,
|
516
|
+
normalize: bool = False) -> Dict[str, Any]:
|
517
|
+
"""Adjust audio volume"""
|
518
|
+
try:
|
519
|
+
if not Path(input_file).exists():
|
520
|
+
return {"success": False, "error": f"Input file not found: {input_file}"}
|
521
|
+
|
522
|
+
file_ext = Path(input_file).suffix
|
523
|
+
output_filename = f"volume_adjusted_{int(time.time())}{file_ext}"
|
524
|
+
|
525
|
+
# Build audio filter
|
526
|
+
if normalize:
|
527
|
+
audio_filter = f"loudnorm,volume={volume_change}"
|
528
|
+
else:
|
529
|
+
audio_filter = f"volume={volume_change}"
|
530
|
+
|
531
|
+
ffmpeg_command = (
|
532
|
+
f"ffmpeg -i ${{input_file}} "
|
533
|
+
f"-af \"{audio_filter}\" "
|
534
|
+
f"-c:v copy "
|
535
|
+
f"${{OUTPUT}}"
|
536
|
+
)
|
537
|
+
|
538
|
+
result = self.media_executor.execute_ffmpeg_command(
|
539
|
+
ffmpeg_command=ffmpeg_command,
|
540
|
+
input_files={'input_file': input_file},
|
541
|
+
output_filename=output_filename
|
542
|
+
)
|
543
|
+
|
544
|
+
if result['success']:
|
545
|
+
return {
|
546
|
+
"success": True,
|
547
|
+
"message": f"Audio volume adjusted successfully",
|
548
|
+
"output_file": result['output_file'],
|
549
|
+
"input_file": input_file,
|
550
|
+
"volume_settings": {
|
551
|
+
"volume_change": volume_change,
|
552
|
+
"normalize": normalize
|
553
|
+
},
|
554
|
+
"execution_time": result['execution_time']
|
555
|
+
}
|
556
|
+
else:
|
557
|
+
return result
|
558
|
+
|
559
|
+
except Exception as e:
|
560
|
+
return {"success": False, "error": str(e)}
|
561
|
+
|
562
|
+
async def _convert_audio_format(self, input_audio: str, output_format: str = "mp3",
|
563
|
+
bitrate: str = "192k", sample_rate: int = 44100) -> Dict[str, Any]:
|
564
|
+
"""Convert audio to different format"""
|
565
|
+
try:
|
566
|
+
if not Path(input_audio).exists():
|
567
|
+
return {"success": False, "error": f"Input audio file not found: {input_audio}"}
|
568
|
+
|
569
|
+
output_filename = f"converted_audio_{int(time.time())}.{output_format}"
|
570
|
+
|
571
|
+
audio_codec = self._get_audio_codec(output_format)
|
572
|
+
|
573
|
+
ffmpeg_command = (
|
574
|
+
f"ffmpeg -i ${{input_audio}} "
|
575
|
+
f"-acodec {audio_codec} "
|
576
|
+
f"-ab {bitrate} "
|
577
|
+
f"-ar {sample_rate} "
|
578
|
+
f"${{OUTPUT}}"
|
579
|
+
)
|
580
|
+
|
581
|
+
result = self.media_executor.execute_ffmpeg_command(
|
582
|
+
ffmpeg_command=ffmpeg_command,
|
583
|
+
input_files={'input_audio': input_audio},
|
584
|
+
output_filename=output_filename
|
585
|
+
)
|
586
|
+
|
587
|
+
if result['success']:
|
588
|
+
return {
|
589
|
+
"success": True,
|
590
|
+
"message": f"Audio converted successfully to {output_format}",
|
591
|
+
"output_file": result['output_file'],
|
592
|
+
"input_audio": input_audio,
|
593
|
+
"conversion_settings": {
|
594
|
+
"output_format": output_format,
|
595
|
+
"bitrate": bitrate,
|
596
|
+
"sample_rate": sample_rate,
|
597
|
+
"codec": audio_codec
|
598
|
+
},
|
599
|
+
"execution_time": result['execution_time']
|
600
|
+
}
|
601
|
+
else:
|
602
|
+
return result
|
603
|
+
|
604
|
+
except Exception as e:
|
605
|
+
return {"success": False, "error": str(e)}
|
606
|
+
|
607
|
+
def _get_audio_codec(self, format: str) -> str:
|
608
|
+
"""Get appropriate audio codec for format"""
|
609
|
+
codec_map = {
|
610
|
+
"mp3": "libmp3lame",
|
611
|
+
"aac": "aac",
|
612
|
+
"wav": "pcm_s16le",
|
613
|
+
"flac": "flac",
|
614
|
+
"ogg": "libvorbis",
|
615
|
+
"opus": "libopus"
|
616
|
+
}
|
617
|
+
return codec_map.get(format, "aac")
|
618
|
+
|
619
|
+
async def process_message(self, message: AgentMessage, context: ExecutionContext) -> AgentMessage:
|
620
|
+
"""Process incoming message and route to appropriate media operations"""
|
621
|
+
self.memory.store_message(message)
|
622
|
+
|
623
|
+
try:
|
624
|
+
content = message.content.lower()
|
625
|
+
user_message = message.content
|
626
|
+
|
627
|
+
# Determine the appropriate action based on message content
|
628
|
+
if any(keyword in content for keyword in ['extract audio', 'audio from video', 'extract sound']):
|
629
|
+
response_content = await self._handle_audio_extraction_request(user_message, context)
|
630
|
+
elif any(keyword in content for keyword in ['convert video', 'transcode', 'change format']):
|
631
|
+
response_content = await self._handle_video_conversion_request(user_message, context)
|
632
|
+
elif any(keyword in content for keyword in ['resize video', 'scale video', 'change size']):
|
633
|
+
response_content = await self._handle_video_resize_request(user_message, context)
|
634
|
+
elif any(keyword in content for keyword in ['trim', 'cut', 'clip', 'extract clip']):
|
635
|
+
response_content = await self._handle_media_trim_request(user_message, context)
|
636
|
+
elif any(keyword in content for keyword in ['thumbnail', 'screenshot', 'frame']):
|
637
|
+
response_content = await self._handle_thumbnail_request(user_message, context)
|
638
|
+
elif any(keyword in content for keyword in ['merge', 'combine', 'join']):
|
639
|
+
response_content = await self._handle_merge_request(user_message, context)
|
640
|
+
elif any(keyword in content for keyword in ['volume', 'loud', 'quiet', 'audio level']):
|
641
|
+
response_content = await self._handle_volume_request(user_message, context)
|
642
|
+
elif any(keyword in content for keyword in ['info', 'details', 'properties', 'metadata']):
|
643
|
+
response_content = await self._handle_info_request(user_message, context)
|
644
|
+
else:
|
645
|
+
response_content = await self._handle_general_request(user_message, context)
|
646
|
+
|
647
|
+
response = self.create_response(
|
648
|
+
content=response_content,
|
649
|
+
recipient_id=message.sender_id,
|
650
|
+
session_id=message.session_id,
|
651
|
+
conversation_id=message.conversation_id
|
652
|
+
)
|
653
|
+
|
654
|
+
self.memory.store_message(response)
|
655
|
+
return response
|
656
|
+
|
657
|
+
except Exception as e:
|
658
|
+
error_response = self.create_response(
|
659
|
+
content=f"Media Editor Agent error: {str(e)}",
|
660
|
+
recipient_id=message.sender_id,
|
661
|
+
message_type=MessageType.ERROR,
|
662
|
+
session_id=message.session_id,
|
663
|
+
conversation_id=message.conversation_id
|
664
|
+
)
|
665
|
+
return error_response
|
666
|
+
|
667
|
+
async def _handle_audio_extraction_request(self, user_message: str, context: ExecutionContext) -> str:
|
668
|
+
"""Handle audio extraction requests"""
|
669
|
+
return ("I can extract audio from video files. Please provide:\n\n"
|
670
|
+
"1. Path to the video file\n"
|
671
|
+
"2. Desired audio format (mp3, wav, aac, flac)\n"
|
672
|
+
"3. Audio quality (high, medium, low)\n\n"
|
673
|
+
"Example: 'Extract audio from /path/to/video.mp4 as high quality mp3'")
|
674
|
+
|
675
|
+
async def _handle_video_conversion_request(self, user_message: str, context: ExecutionContext) -> str:
|
676
|
+
"""Handle video conversion requests"""
|
677
|
+
return ("I can convert videos to different formats. Please specify:\n\n"
|
678
|
+
"1. Input video file path\n"
|
679
|
+
"2. Target format (mp4, avi, mov, mkv, webm)\n"
|
680
|
+
"3. Video codec (h264, h265, vp9)\n"
|
681
|
+
"4. Audio codec (aac, mp3, opus)\n"
|
682
|
+
"5. Quality (CRF value 0-51, lower = better)\n\n"
|
683
|
+
"Example: 'Convert /path/to/video.avi to mp4 with h264 codec'")
|
684
|
+
|
685
|
+
async def _handle_video_resize_request(self, user_message: str, context: ExecutionContext) -> str:
|
686
|
+
"""Handle video resize requests"""
|
687
|
+
return ("I can resize videos to different dimensions. Please provide:\n\n"
|
688
|
+
"1. Input video file path\n"
|
689
|
+
"2. Target dimensions (width x height) or preset (720p, 1080p, 4k)\n"
|
690
|
+
"3. Whether to maintain aspect ratio\n\n"
|
691
|
+
"Example: 'Resize /path/to/video.mp4 to 1280x720' or 'Resize video to 720p'")
|
692
|
+
|
693
|
+
async def _handle_media_trim_request(self, user_message: str, context: ExecutionContext) -> str:
|
694
|
+
"""Handle media trimming requests"""
|
695
|
+
return ("I can trim/cut media files. Please specify:\n\n"
|
696
|
+
"1. Input file path\n"
|
697
|
+
"2. Start time (HH:MM:SS format)\n"
|
698
|
+
"3. Duration or end time\n\n"
|
699
|
+
"Example: 'Trim /path/to/video.mp4 from 00:01:30 for 30 seconds'")
|
700
|
+
|
701
|
+
async def _handle_thumbnail_request(self, user_message: str, context: ExecutionContext) -> str:
|
702
|
+
"""Handle thumbnail creation requests"""
|
703
|
+
return ("I can create thumbnails from videos. Please provide:\n\n"
|
704
|
+
"1. Input video file path\n"
|
705
|
+
"2. Timestamp for thumbnail (HH:MM:SS)\n"
|
706
|
+
"3. Output format (jpg, png, bmp)\n"
|
707
|
+
"4. Thumbnail width (optional)\n\n"
|
708
|
+
"Example: 'Create thumbnail from /path/to/video.mp4 at 00:05:00'")
|
709
|
+
|
710
|
+
async def _handle_merge_request(self, user_message: str, context: ExecutionContext) -> str:
|
711
|
+
"""Handle audio/video merge requests"""
|
712
|
+
return ("I can merge separate audio and video files. Please provide:\n\n"
|
713
|
+
"1. Video file path\n"
|
714
|
+
"2. Audio file path\n"
|
715
|
+
"3. Output format (mp4, mkv, avi)\n\n"
|
716
|
+
"Example: 'Merge /path/to/video.mp4 with /path/to/audio.mp3'")
|
717
|
+
|
718
|
+
async def _handle_volume_request(self, user_message: str, context: ExecutionContext) -> str:
|
719
|
+
"""Handle volume adjustment requests"""
|
720
|
+
return ("I can adjust audio volume. Please specify:\n\n"
|
721
|
+
"1. Input file path (audio or video)\n"
|
722
|
+
"2. Volume change (+10dB, -5dB, 0.5, 2.0)\n"
|
723
|
+
"3. Whether to normalize audio levels\n\n"
|
724
|
+
"Example: 'Increase volume of /path/to/audio.mp3 by +5dB'")
|
725
|
+
|
726
|
+
async def _handle_info_request(self, user_message: str, context: ExecutionContext) -> str:
|
727
|
+
"""Handle media info requests"""
|
728
|
+
return ("I can provide detailed information about media files. Please provide:\n\n"
|
729
|
+
"1. Path to the media file\n\n"
|
730
|
+
"I'll show you format, duration, codecs, resolution, bitrate, and other metadata.\n\n"
|
731
|
+
"Example: 'Get info for /path/to/media.mp4'")
|
732
|
+
|
733
|
+
async def _handle_general_request(self, user_message: str, context: ExecutionContext) -> str:
|
734
|
+
"""Handle general media processing requests"""
|
735
|
+
if self.llm_service:
|
736
|
+
prompt = f"""
|
737
|
+
You are a Media Editor Agent specialized in audio/video processing using FFmpeg.
|
738
|
+
|
739
|
+
Your capabilities include:
|
740
|
+
- Extracting audio from video files
|
741
|
+
- Converting video/audio formats and codecs
|
742
|
+
- Resizing and scaling videos
|
743
|
+
- Trimming/cutting media files
|
744
|
+
- Creating thumbnails and extracting frames
|
745
|
+
- Merging audio and video files
|
746
|
+
- Adjusting audio volume and levels
|
747
|
+
- Getting detailed media file information
|
748
|
+
- Processing various formats (MP4, AVI, MOV, MP3, WAV, etc.)
|
749
|
+
|
750
|
+
User message: {user_message}
|
751
|
+
|
752
|
+
Provide a helpful response about how you can assist with their media processing needs.
|
753
|
+
"""
|
754
|
+
|
755
|
+
response = await self.llm_service.generate_response(prompt, context.metadata)
|
756
|
+
return response
|
757
|
+
else:
|
758
|
+
return ("I'm your Media Editor Agent! I can help you with:\n\n"
|
759
|
+
"🎥 **Video Processing**\n"
|
760
|
+
"- Convert between formats (MP4, AVI, MOV, MKV, WebM)\n"
|
761
|
+
"- Resize and scale videos\n"
|
762
|
+
"- Extract thumbnails and frames\n"
|
763
|
+
"- Trim and cut video clips\n\n"
|
764
|
+
"🎵 **Audio Processing**\n"
|
765
|
+
"- Extract audio from videos\n"
|
766
|
+
"- Convert audio formats (MP3, WAV, AAC, FLAC)\n"
|
767
|
+
"- Adjust volume and normalize levels\n"
|
768
|
+
"- Merge audio with video\n\n"
|
769
|
+
"📊 **Media Analysis**\n"
|
770
|
+
"- Get detailed media information\n"
|
771
|
+
"- Check codecs, resolution, and bitrates\n"
|
772
|
+
"- Analyze file properties\n\n"
|
773
|
+
"⚙️ **Advanced Features**\n"
|
774
|
+
"- Custom FFmpeg processing\n"
|
775
|
+
"- Batch operations\n"
|
776
|
+
"- Quality optimization\n\n"
|
777
|
+
"How can I help you process your media files today?")
|