ambivo-agents 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,777 @@
1
+ # ambivo_agents/agents/media_editor.py
2
+ """
3
+ Media Editor Agent with FFmpeg Integration
4
+ Handles audio/video processing using Docker containers with ffmpeg
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import uuid
10
+ import time
11
+ import tempfile
12
+ import shutil
13
+ import os
14
+ from pathlib import Path
15
+ from typing import Dict, List, Any, Optional, Union
16
+ from datetime import datetime, timedelta
17
+
18
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
19
+ from ..config.loader import load_config, get_config_section
20
+ from ..executors.media_executor import MediaDockerExecutor
21
+
22
+
23
+ class MediaEditorAgent(BaseAgent):
24
+ """Media Editor Agent for audio/video processing using FFmpeg"""
25
+
26
+ def __init__(self, agent_id: str| None = None, memory_manager=None, llm_service=None, **kwargs):
27
+ if agent_id is None:
28
+ agent_id = f"media_editor_{str(uuid.uuid4())[:8]}"
29
+
30
+ super().__init__(
31
+ agent_id=agent_id,
32
+ role=AgentRole.CODE_EXECUTOR, # Using CODE_EXECUTOR role for media processing
33
+ memory_manager=memory_manager,
34
+ llm_service=llm_service,
35
+ name="Media Editor Agent",
36
+ description="Agent for audio/video processing, transcoding, and editing using FFmpeg",
37
+ **kwargs
38
+ )
39
+
40
+ # Load media configuration from YAML
41
+ try:
42
+ config = load_config()
43
+ self.media_config = get_config_section('media_editor', config)
44
+ except Exception as e:
45
+ raise ValueError(f"media_editor configuration not found in agent_config.yaml: {e}")
46
+
47
+ # Initialize media Docker executor
48
+ self.media_executor = MediaDockerExecutor(self.media_config)
49
+
50
+ # Add media processing tools
51
+ self._add_media_tools()
52
+
53
+ def _add_media_tools(self):
54
+ """Add all media processing tools"""
55
+
56
+ # Extract audio from video
57
+ self.add_tool(AgentTool(
58
+ name="extract_audio_from_video",
59
+ description="Extract audio track from video file",
60
+ function=self._extract_audio_from_video,
61
+ parameters_schema={
62
+ "type": "object",
63
+ "properties": {
64
+ "input_video": {"type": "string", "description": "Path to input video file"},
65
+ "output_format": {"type": "string", "enum": ["mp3", "wav", "aac", "flac"], "default": "mp3"},
66
+ "audio_quality": {"type": "string", "enum": ["high", "medium", "low"], "default": "medium"}
67
+ },
68
+ "required": ["input_video"]
69
+ }
70
+ ))
71
+
72
+ # Convert video format
73
+ self.add_tool(AgentTool(
74
+ name="convert_video_format",
75
+ description="Convert video to different format/codec",
76
+ function=self._convert_video_format,
77
+ parameters_schema={
78
+ "type": "object",
79
+ "properties": {
80
+ "input_video": {"type": "string", "description": "Path to input video file"},
81
+ "output_format": {"type": "string", "enum": ["mp4", "avi", "mov", "mkv", "webm"], "default": "mp4"},
82
+ "video_codec": {"type": "string", "enum": ["h264", "h265", "vp9", "copy"], "default": "h264"},
83
+ "audio_codec": {"type": "string", "enum": ["aac", "mp3", "opus", "copy"], "default": "aac"},
84
+ "crf": {"type": "integer", "minimum": 0, "maximum": 51, "default": 23}
85
+ },
86
+ "required": ["input_video"]
87
+ }
88
+ ))
89
+
90
+ # Resize video
91
+ self.add_tool(AgentTool(
92
+ name="resize_video",
93
+ description="Resize video to specific dimensions",
94
+ function=self._resize_video,
95
+ parameters_schema={
96
+ "type": "object",
97
+ "properties": {
98
+ "input_video": {"type": "string", "description": "Path to input video file"},
99
+ "width": {"type": "integer", "description": "Target width in pixels"},
100
+ "height": {"type": "integer", "description": "Target height in pixels"},
101
+ "maintain_aspect": {"type": "boolean", "default": True},
102
+ "preset": {"type": "string", "enum": ["720p", "1080p", "4k", "480p", "custom"], "default": "custom"}
103
+ },
104
+ "required": ["input_video"]
105
+ }
106
+ ))
107
+
108
+ # Get media information
109
+ self.add_tool(AgentTool(
110
+ name="get_media_info",
111
+ description="Get detailed information about media file",
112
+ function=self._get_media_info,
113
+ parameters_schema={
114
+ "type": "object",
115
+ "properties": {
116
+ "file_path": {"type": "string", "description": "Path to media file"}
117
+ },
118
+ "required": ["file_path"]
119
+ }
120
+ ))
121
+
122
+ # Trim media
123
+ self.add_tool(AgentTool(
124
+ name="trim_media",
125
+ description="Trim/cut media file to specific time range",
126
+ function=self._trim_media,
127
+ parameters_schema={
128
+ "type": "object",
129
+ "properties": {
130
+ "input_file": {"type": "string", "description": "Path to input media file"},
131
+ "start_time": {"type": "string", "description": "Start time (HH:MM:SS or seconds)"},
132
+ "duration": {"type": "string", "description": "Duration (HH:MM:SS or seconds)"},
133
+ "end_time": {"type": "string", "description": "End time (alternative to duration)"}
134
+ },
135
+ "required": ["input_file", "start_time"]
136
+ }
137
+ ))
138
+
139
+ # Create video thumbnail
140
+ self.add_tool(AgentTool(
141
+ name="create_video_thumbnail",
142
+ description="Extract thumbnail/frame from video",
143
+ function=self._create_video_thumbnail,
144
+ parameters_schema={
145
+ "type": "object",
146
+ "properties": {
147
+ "input_video": {"type": "string", "description": "Path to input video file"},
148
+ "timestamp": {"type": "string", "description": "Time to extract frame (HH:MM:SS)",
149
+ "default": "00:00:05"},
150
+ "output_format": {"type": "string", "enum": ["jpg", "png", "bmp"], "default": "jpg"},
151
+ "width": {"type": "integer", "description": "Thumbnail width", "default": 320}
152
+ },
153
+ "required": ["input_video"]
154
+ }
155
+ ))
156
+
157
+ # Merge audio and video
158
+ self.add_tool(AgentTool(
159
+ name="merge_audio_video",
160
+ description="Combine separate audio and video files",
161
+ function=self._merge_audio_video,
162
+ parameters_schema={
163
+ "type": "object",
164
+ "properties": {
165
+ "video_file": {"type": "string", "description": "Path to video file"},
166
+ "audio_file": {"type": "string", "description": "Path to audio file"},
167
+ "output_format": {"type": "string", "enum": ["mp4", "mkv", "avi"], "default": "mp4"}
168
+ },
169
+ "required": ["video_file", "audio_file"]
170
+ }
171
+ ))
172
+
173
+ # Adjust audio volume
174
+ self.add_tool(AgentTool(
175
+ name="adjust_audio_volume",
176
+ description="Adjust audio volume/gain",
177
+ function=self._adjust_audio_volume,
178
+ parameters_schema={
179
+ "type": "object",
180
+ "properties": {
181
+ "input_file": {"type": "string", "description": "Path to audio/video file"},
182
+ "volume_change": {"type": "string", "description": "Volume change (+10dB, -5dB, 0.5, 2.0)"},
183
+ "normalize": {"type": "boolean", "description": "Normalize audio levels", "default": False}
184
+ },
185
+ "required": ["input_file", "volume_change"]
186
+ }
187
+ ))
188
+
189
+ # Convert audio format
190
+ self.add_tool(AgentTool(
191
+ name="convert_audio_format",
192
+ description="Convert audio to different format",
193
+ function=self._convert_audio_format,
194
+ parameters_schema={
195
+ "type": "object",
196
+ "properties": {
197
+ "input_audio": {"type": "string", "description": "Path to input audio file"},
198
+ "output_format": {"type": "string", "enum": ["mp3", "wav", "aac", "flac", "ogg"], "default": "mp3"},
199
+ "bitrate": {"type": "string", "description": "Audio bitrate (128k, 192k, 320k)", "default": "192k"},
200
+ "sample_rate": {"type": "integer", "description": "Sample rate (44100, 48000)", "default": 44100}
201
+ },
202
+ "required": ["input_audio"]
203
+ }
204
+ ))
205
+
206
+ async def _extract_audio_from_video(self, input_video: str, output_format: str = "mp3",
207
+ audio_quality: str = "medium") -> Dict[str, Any]:
208
+ """Extract audio from video file"""
209
+ try:
210
+ if not Path(input_video).exists():
211
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
212
+
213
+ # Quality settings
214
+ quality_settings = {
215
+ "low": "-b:a 128k",
216
+ "medium": "-b:a 192k",
217
+ "high": "-b:a 320k"
218
+ }
219
+
220
+ output_filename = f"extracted_audio_{int(time.time())}.{output_format}"
221
+
222
+ ffmpeg_command = (
223
+ f"ffmpeg -i ${{input_video}} "
224
+ f"{quality_settings.get(audio_quality, quality_settings['medium'])} "
225
+ f"-vn -acodec {self._get_audio_codec(output_format)} "
226
+ f"${{OUTPUT}}"
227
+ )
228
+
229
+ result = self.media_executor.execute_ffmpeg_command(
230
+ ffmpeg_command=ffmpeg_command,
231
+ input_files={'input_video': input_video},
232
+ output_filename=output_filename
233
+ )
234
+
235
+ if result['success']:
236
+ return {
237
+ "success": True,
238
+ "message": f"Audio extracted successfully to {output_format}",
239
+ "output_file": result['output_file'],
240
+ "input_video": input_video,
241
+ "execution_time": result['execution_time']
242
+ }
243
+ else:
244
+ return result
245
+
246
+ except Exception as e:
247
+ return {"success": False, "error": str(e)}
248
+
249
+ async def _convert_video_format(self, input_video: str, output_format: str = "mp4",
250
+ video_codec: str = "h264", audio_codec: str = "aac",
251
+ crf: int = 23) -> Dict[str, Any]:
252
+ """Convert video to different format"""
253
+ try:
254
+ if not Path(input_video).exists():
255
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
256
+
257
+ output_filename = f"converted_video_{int(time.time())}.{output_format}"
258
+
259
+ # Build codec parameters
260
+ video_params = f"-c:v {video_codec}" if video_codec != "copy" else "-c:v copy"
261
+ audio_params = f"-c:a {audio_codec}" if audio_codec != "copy" else "-c:a copy"
262
+
263
+ if video_codec in ["h264", "h265"] and video_codec != "copy":
264
+ video_params += f" -crf {crf}"
265
+
266
+ ffmpeg_command = (
267
+ f"ffmpeg -i ${{input_video}} "
268
+ f"{video_params} {audio_params} "
269
+ f"-preset medium "
270
+ f"${{OUTPUT}}"
271
+ )
272
+
273
+ result = self.media_executor.execute_ffmpeg_command(
274
+ ffmpeg_command=ffmpeg_command,
275
+ input_files={'input_video': input_video},
276
+ output_filename=output_filename
277
+ )
278
+
279
+ if result['success']:
280
+ return {
281
+ "success": True,
282
+ "message": f"Video converted successfully to {output_format}",
283
+ "output_file": result['output_file'],
284
+ "input_video": input_video,
285
+ "conversion_settings": {
286
+ "output_format": output_format,
287
+ "video_codec": video_codec,
288
+ "audio_codec": audio_codec,
289
+ "crf": crf
290
+ },
291
+ "execution_time": result['execution_time']
292
+ }
293
+ else:
294
+ return result
295
+
296
+ except Exception as e:
297
+ return {"success": False, "error": str(e)}
298
+
299
+ async def _resize_video(self, input_video: str, width: int = None, height: int = None,
300
+ maintain_aspect: bool = True, preset: str = "custom") -> Dict[str, Any]:
301
+ """Resize video to specific dimensions"""
302
+ try:
303
+ if not Path(input_video).exists():
304
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
305
+
306
+ # Handle presets
307
+ if preset != "custom":
308
+ preset_dimensions = {
309
+ "480p": (854, 480),
310
+ "720p": (1280, 720),
311
+ "1080p": (1920, 1080),
312
+ "4k": (3840, 2160)
313
+ }
314
+ if preset in preset_dimensions:
315
+ width, height = preset_dimensions[preset]
316
+
317
+ if not width or not height:
318
+ return {"success": False, "error": "Width and height must be specified"}
319
+
320
+ output_filename = f"resized_video_{width}x{height}_{int(time.time())}.mp4"
321
+
322
+ # Scale filter with aspect ratio handling
323
+ if maintain_aspect:
324
+ scale_filter = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2"
325
+ else:
326
+ scale_filter = f"scale={width}:{height}"
327
+
328
+ ffmpeg_command = (
329
+ f"ffmpeg -i ${{input_video}} "
330
+ f"-vf \"{scale_filter}\" "
331
+ f"-c:a copy "
332
+ f"${{OUTPUT}}"
333
+ )
334
+
335
+ result = self.media_executor.execute_ffmpeg_command(
336
+ ffmpeg_command=ffmpeg_command,
337
+ input_files={'input_video': input_video},
338
+ output_filename=output_filename
339
+ )
340
+
341
+ if result['success']:
342
+ return {
343
+ "success": True,
344
+ "message": f"Video resized successfully to {width}x{height}",
345
+ "output_file": result['output_file'],
346
+ "input_video": input_video,
347
+ "resize_settings": {
348
+ "width": width,
349
+ "height": height,
350
+ "maintain_aspect": maintain_aspect,
351
+ "preset": preset
352
+ },
353
+ "execution_time": result['execution_time']
354
+ }
355
+ else:
356
+ return result
357
+
358
+ except Exception as e:
359
+ return {"success": False, "error": str(e)}
360
+
361
+ async def _get_media_info(self, file_path: str) -> Dict[str, Any]:
362
+ """Get detailed media file information"""
363
+ try:
364
+ result = self.media_executor.get_media_info(file_path)
365
+
366
+ if result['success']:
367
+ return {
368
+ "success": True,
369
+ "message": "Media information retrieved successfully",
370
+ "file_path": file_path,
371
+ "media_info": result.get('media_info', {}),
372
+ "raw_output": result.get('raw_output', '')
373
+ }
374
+ else:
375
+ return result
376
+
377
+ except Exception as e:
378
+ return {"success": False, "error": str(e)}
379
+
380
+ async def _trim_media(self, input_file: str, start_time: str,
381
+ duration: str = None, end_time: str = None) -> Dict[str, Any]:
382
+ """Trim media file to specific time range"""
383
+ try:
384
+ if not Path(input_file).exists():
385
+ return {"success": False, "error": f"Input file not found: {input_file}"}
386
+
387
+ if not duration and not end_time:
388
+ return {"success": False, "error": "Either duration or end_time must be specified"}
389
+
390
+ file_ext = Path(input_file).suffix
391
+ output_filename = f"trimmed_media_{int(time.time())}{file_ext}"
392
+
393
+ # Build time parameters
394
+ time_params = f"-ss {start_time}"
395
+ if duration:
396
+ time_params += f" -t {duration}"
397
+ elif end_time:
398
+ time_params += f" -to {end_time}"
399
+
400
+ ffmpeg_command = (
401
+ f"ffmpeg -i ${{input_file}} "
402
+ f"{time_params} "
403
+ f"-c copy "
404
+ f"${{OUTPUT}}"
405
+ )
406
+
407
+ result = self.media_executor.execute_ffmpeg_command(
408
+ ffmpeg_command=ffmpeg_command,
409
+ input_files={'input_file': input_file},
410
+ output_filename=output_filename
411
+ )
412
+
413
+ if result['success']:
414
+ return {
415
+ "success": True,
416
+ "message": f"Media trimmed successfully",
417
+ "output_file": result['output_file'],
418
+ "input_file": input_file,
419
+ "trim_settings": {
420
+ "start_time": start_time,
421
+ "duration": duration,
422
+ "end_time": end_time
423
+ },
424
+ "execution_time": result['execution_time']
425
+ }
426
+ else:
427
+ return result
428
+
429
+ except Exception as e:
430
+ return {"success": False, "error": str(e)}
431
+
432
+ async def _create_video_thumbnail(self, input_video: str, timestamp: str = "00:00:05",
433
+ output_format: str = "jpg", width: int = 320) -> Dict[str, Any]:
434
+ """Create thumbnail from video"""
435
+ try:
436
+ if not Path(input_video).exists():
437
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
438
+
439
+ output_filename = f"thumbnail_{int(time.time())}.{output_format}"
440
+
441
+ ffmpeg_command = (
442
+ f"ffmpeg -i ${{input_video}} "
443
+ f"-ss {timestamp} "
444
+ f"-vframes 1 "
445
+ f"-vf scale={width}:-1 "
446
+ f"${{OUTPUT}}"
447
+ )
448
+
449
+ result = self.media_executor.execute_ffmpeg_command(
450
+ ffmpeg_command=ffmpeg_command,
451
+ input_files={'input_video': input_video},
452
+ output_filename=output_filename
453
+ )
454
+
455
+ if result['success']:
456
+ return {
457
+ "success": True,
458
+ "message": f"Thumbnail created successfully",
459
+ "output_file": result['output_file'],
460
+ "input_video": input_video,
461
+ "thumbnail_settings": {
462
+ "timestamp": timestamp,
463
+ "output_format": output_format,
464
+ "width": width
465
+ },
466
+ "execution_time": result['execution_time']
467
+ }
468
+ else:
469
+ return result
470
+
471
+ except Exception as e:
472
+ return {"success": False, "error": str(e)}
473
+
474
+ async def _merge_audio_video(self, video_file: str, audio_file: str,
475
+ output_format: str = "mp4") -> Dict[str, Any]:
476
+ """Merge separate audio and video files"""
477
+ try:
478
+ if not Path(video_file).exists():
479
+ return {"success": False, "error": f"Video file not found: {video_file}"}
480
+ if not Path(audio_file).exists():
481
+ return {"success": False, "error": f"Audio file not found: {audio_file}"}
482
+
483
+ output_filename = f"merged_av_{int(time.time())}.{output_format}"
484
+
485
+ ffmpeg_command = (
486
+ f"ffmpeg -i ${{video_file}} -i ${{audio_file}} "
487
+ f"-c:v copy -c:a aac "
488
+ f"-shortest "
489
+ f"${{OUTPUT}}"
490
+ )
491
+
492
+ result = self.media_executor.execute_ffmpeg_command(
493
+ ffmpeg_command=ffmpeg_command,
494
+ input_files={'video_file': video_file, 'audio_file': audio_file},
495
+ output_filename=output_filename
496
+ )
497
+
498
+ if result['success']:
499
+ return {
500
+ "success": True,
501
+ "message": f"Audio and video merged successfully",
502
+ "output_file": result['output_file'],
503
+ "input_files": {
504
+ "video": video_file,
505
+ "audio": audio_file
506
+ },
507
+ "execution_time": result['execution_time']
508
+ }
509
+ else:
510
+ return result
511
+
512
+ except Exception as e:
513
+ return {"success": False, "error": str(e)}
514
+
515
+ async def _adjust_audio_volume(self, input_file: str, volume_change: str,
516
+ normalize: bool = False) -> Dict[str, Any]:
517
+ """Adjust audio volume"""
518
+ try:
519
+ if not Path(input_file).exists():
520
+ return {"success": False, "error": f"Input file not found: {input_file}"}
521
+
522
+ file_ext = Path(input_file).suffix
523
+ output_filename = f"volume_adjusted_{int(time.time())}{file_ext}"
524
+
525
+ # Build audio filter
526
+ if normalize:
527
+ audio_filter = f"loudnorm,volume={volume_change}"
528
+ else:
529
+ audio_filter = f"volume={volume_change}"
530
+
531
+ ffmpeg_command = (
532
+ f"ffmpeg -i ${{input_file}} "
533
+ f"-af \"{audio_filter}\" "
534
+ f"-c:v copy "
535
+ f"${{OUTPUT}}"
536
+ )
537
+
538
+ result = self.media_executor.execute_ffmpeg_command(
539
+ ffmpeg_command=ffmpeg_command,
540
+ input_files={'input_file': input_file},
541
+ output_filename=output_filename
542
+ )
543
+
544
+ if result['success']:
545
+ return {
546
+ "success": True,
547
+ "message": f"Audio volume adjusted successfully",
548
+ "output_file": result['output_file'],
549
+ "input_file": input_file,
550
+ "volume_settings": {
551
+ "volume_change": volume_change,
552
+ "normalize": normalize
553
+ },
554
+ "execution_time": result['execution_time']
555
+ }
556
+ else:
557
+ return result
558
+
559
+ except Exception as e:
560
+ return {"success": False, "error": str(e)}
561
+
562
+ async def _convert_audio_format(self, input_audio: str, output_format: str = "mp3",
563
+ bitrate: str = "192k", sample_rate: int = 44100) -> Dict[str, Any]:
564
+ """Convert audio to different format"""
565
+ try:
566
+ if not Path(input_audio).exists():
567
+ return {"success": False, "error": f"Input audio file not found: {input_audio}"}
568
+
569
+ output_filename = f"converted_audio_{int(time.time())}.{output_format}"
570
+
571
+ audio_codec = self._get_audio_codec(output_format)
572
+
573
+ ffmpeg_command = (
574
+ f"ffmpeg -i ${{input_audio}} "
575
+ f"-acodec {audio_codec} "
576
+ f"-ab {bitrate} "
577
+ f"-ar {sample_rate} "
578
+ f"${{OUTPUT}}"
579
+ )
580
+
581
+ result = self.media_executor.execute_ffmpeg_command(
582
+ ffmpeg_command=ffmpeg_command,
583
+ input_files={'input_audio': input_audio},
584
+ output_filename=output_filename
585
+ )
586
+
587
+ if result['success']:
588
+ return {
589
+ "success": True,
590
+ "message": f"Audio converted successfully to {output_format}",
591
+ "output_file": result['output_file'],
592
+ "input_audio": input_audio,
593
+ "conversion_settings": {
594
+ "output_format": output_format,
595
+ "bitrate": bitrate,
596
+ "sample_rate": sample_rate,
597
+ "codec": audio_codec
598
+ },
599
+ "execution_time": result['execution_time']
600
+ }
601
+ else:
602
+ return result
603
+
604
+ except Exception as e:
605
+ return {"success": False, "error": str(e)}
606
+
607
+ def _get_audio_codec(self, format: str) -> str:
608
+ """Get appropriate audio codec for format"""
609
+ codec_map = {
610
+ "mp3": "libmp3lame",
611
+ "aac": "aac",
612
+ "wav": "pcm_s16le",
613
+ "flac": "flac",
614
+ "ogg": "libvorbis",
615
+ "opus": "libopus"
616
+ }
617
+ return codec_map.get(format, "aac")
618
+
619
+ async def process_message(self, message: AgentMessage, context: ExecutionContext) -> AgentMessage:
620
+ """Process incoming message and route to appropriate media operations"""
621
+ self.memory.store_message(message)
622
+
623
+ try:
624
+ content = message.content.lower()
625
+ user_message = message.content
626
+
627
+ # Determine the appropriate action based on message content
628
+ if any(keyword in content for keyword in ['extract audio', 'audio from video', 'extract sound']):
629
+ response_content = await self._handle_audio_extraction_request(user_message, context)
630
+ elif any(keyword in content for keyword in ['convert video', 'transcode', 'change format']):
631
+ response_content = await self._handle_video_conversion_request(user_message, context)
632
+ elif any(keyword in content for keyword in ['resize video', 'scale video', 'change size']):
633
+ response_content = await self._handle_video_resize_request(user_message, context)
634
+ elif any(keyword in content for keyword in ['trim', 'cut', 'clip', 'extract clip']):
635
+ response_content = await self._handle_media_trim_request(user_message, context)
636
+ elif any(keyword in content for keyword in ['thumbnail', 'screenshot', 'frame']):
637
+ response_content = await self._handle_thumbnail_request(user_message, context)
638
+ elif any(keyword in content for keyword in ['merge', 'combine', 'join']):
639
+ response_content = await self._handle_merge_request(user_message, context)
640
+ elif any(keyword in content for keyword in ['volume', 'loud', 'quiet', 'audio level']):
641
+ response_content = await self._handle_volume_request(user_message, context)
642
+ elif any(keyword in content for keyword in ['info', 'details', 'properties', 'metadata']):
643
+ response_content = await self._handle_info_request(user_message, context)
644
+ else:
645
+ response_content = await self._handle_general_request(user_message, context)
646
+
647
+ response = self.create_response(
648
+ content=response_content,
649
+ recipient_id=message.sender_id,
650
+ session_id=message.session_id,
651
+ conversation_id=message.conversation_id
652
+ )
653
+
654
+ self.memory.store_message(response)
655
+ return response
656
+
657
+ except Exception as e:
658
+ error_response = self.create_response(
659
+ content=f"Media Editor Agent error: {str(e)}",
660
+ recipient_id=message.sender_id,
661
+ message_type=MessageType.ERROR,
662
+ session_id=message.session_id,
663
+ conversation_id=message.conversation_id
664
+ )
665
+ return error_response
666
+
667
+ async def _handle_audio_extraction_request(self, user_message: str, context: ExecutionContext) -> str:
668
+ """Handle audio extraction requests"""
669
+ return ("I can extract audio from video files. Please provide:\n\n"
670
+ "1. Path to the video file\n"
671
+ "2. Desired audio format (mp3, wav, aac, flac)\n"
672
+ "3. Audio quality (high, medium, low)\n\n"
673
+ "Example: 'Extract audio from /path/to/video.mp4 as high quality mp3'")
674
+
675
+ async def _handle_video_conversion_request(self, user_message: str, context: ExecutionContext) -> str:
676
+ """Handle video conversion requests"""
677
+ return ("I can convert videos to different formats. Please specify:\n\n"
678
+ "1. Input video file path\n"
679
+ "2. Target format (mp4, avi, mov, mkv, webm)\n"
680
+ "3. Video codec (h264, h265, vp9)\n"
681
+ "4. Audio codec (aac, mp3, opus)\n"
682
+ "5. Quality (CRF value 0-51, lower = better)\n\n"
683
+ "Example: 'Convert /path/to/video.avi to mp4 with h264 codec'")
684
+
685
+ async def _handle_video_resize_request(self, user_message: str, context: ExecutionContext) -> str:
686
+ """Handle video resize requests"""
687
+ return ("I can resize videos to different dimensions. Please provide:\n\n"
688
+ "1. Input video file path\n"
689
+ "2. Target dimensions (width x height) or preset (720p, 1080p, 4k)\n"
690
+ "3. Whether to maintain aspect ratio\n\n"
691
+ "Example: 'Resize /path/to/video.mp4 to 1280x720' or 'Resize video to 720p'")
692
+
693
+ async def _handle_media_trim_request(self, user_message: str, context: ExecutionContext) -> str:
694
+ """Handle media trimming requests"""
695
+ return ("I can trim/cut media files. Please specify:\n\n"
696
+ "1. Input file path\n"
697
+ "2. Start time (HH:MM:SS format)\n"
698
+ "3. Duration or end time\n\n"
699
+ "Example: 'Trim /path/to/video.mp4 from 00:01:30 for 30 seconds'")
700
+
701
+ async def _handle_thumbnail_request(self, user_message: str, context: ExecutionContext) -> str:
702
+ """Handle thumbnail creation requests"""
703
+ return ("I can create thumbnails from videos. Please provide:\n\n"
704
+ "1. Input video file path\n"
705
+ "2. Timestamp for thumbnail (HH:MM:SS)\n"
706
+ "3. Output format (jpg, png, bmp)\n"
707
+ "4. Thumbnail width (optional)\n\n"
708
+ "Example: 'Create thumbnail from /path/to/video.mp4 at 00:05:00'")
709
+
710
+ async def _handle_merge_request(self, user_message: str, context: ExecutionContext) -> str:
711
+ """Handle audio/video merge requests"""
712
+ return ("I can merge separate audio and video files. Please provide:\n\n"
713
+ "1. Video file path\n"
714
+ "2. Audio file path\n"
715
+ "3. Output format (mp4, mkv, avi)\n\n"
716
+ "Example: 'Merge /path/to/video.mp4 with /path/to/audio.mp3'")
717
+
718
+ async def _handle_volume_request(self, user_message: str, context: ExecutionContext) -> str:
719
+ """Handle volume adjustment requests"""
720
+ return ("I can adjust audio volume. Please specify:\n\n"
721
+ "1. Input file path (audio or video)\n"
722
+ "2. Volume change (+10dB, -5dB, 0.5, 2.0)\n"
723
+ "3. Whether to normalize audio levels\n\n"
724
+ "Example: 'Increase volume of /path/to/audio.mp3 by +5dB'")
725
+
726
+ async def _handle_info_request(self, user_message: str, context: ExecutionContext) -> str:
727
+ """Handle media info requests"""
728
+ return ("I can provide detailed information about media files. Please provide:\n\n"
729
+ "1. Path to the media file\n\n"
730
+ "I'll show you format, duration, codecs, resolution, bitrate, and other metadata.\n\n"
731
+ "Example: 'Get info for /path/to/media.mp4'")
732
+
733
+ async def _handle_general_request(self, user_message: str, context: ExecutionContext) -> str:
734
+ """Handle general media processing requests"""
735
+ if self.llm_service:
736
+ prompt = f"""
737
+ You are a Media Editor Agent specialized in audio/video processing using FFmpeg.
738
+
739
+ Your capabilities include:
740
+ - Extracting audio from video files
741
+ - Converting video/audio formats and codecs
742
+ - Resizing and scaling videos
743
+ - Trimming/cutting media files
744
+ - Creating thumbnails and extracting frames
745
+ - Merging audio and video files
746
+ - Adjusting audio volume and levels
747
+ - Getting detailed media file information
748
+ - Processing various formats (MP4, AVI, MOV, MP3, WAV, etc.)
749
+
750
+ User message: {user_message}
751
+
752
+ Provide a helpful response about how you can assist with their media processing needs.
753
+ """
754
+
755
+ response = await self.llm_service.generate_response(prompt, context.metadata)
756
+ return response
757
+ else:
758
+ return ("I'm your Media Editor Agent! I can help you with:\n\n"
759
+ "🎥 **Video Processing**\n"
760
+ "- Convert between formats (MP4, AVI, MOV, MKV, WebM)\n"
761
+ "- Resize and scale videos\n"
762
+ "- Extract thumbnails and frames\n"
763
+ "- Trim and cut video clips\n\n"
764
+ "🎵 **Audio Processing**\n"
765
+ "- Extract audio from videos\n"
766
+ "- Convert audio formats (MP3, WAV, AAC, FLAC)\n"
767
+ "- Adjust volume and normalize levels\n"
768
+ "- Merge audio with video\n\n"
769
+ "📊 **Media Analysis**\n"
770
+ "- Get detailed media information\n"
771
+ "- Check codecs, resolution, and bitrates\n"
772
+ "- Analyze file properties\n\n"
773
+ "⚙️ **Advanced Features**\n"
774
+ "- Custom FFmpeg processing\n"
775
+ "- Batch operations\n"
776
+ "- Quality optimization\n\n"
777
+ "How can I help you process your media files today?")