ambivo-agents 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,992 @@
1
+ # ambivo_agents/agents/media_editor.py
2
+ """
3
+ Media Editor Agent with FFmpeg Integration
4
+ Handles audio/video processing using Docker containers with ffmpeg
5
+ Updated with LLM-aware intent detection and conversation history integration.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import uuid
11
+ import time
12
+ import tempfile
13
+ import shutil
14
+ import os
15
+ from pathlib import Path
16
+ from typing import Dict, List, Any, Optional, Union
17
+ from datetime import datetime, timedelta
18
+
19
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
20
+ from ..config.loader import load_config, get_config_section
21
+ from ..core.history import MediaAgentHistoryMixin, ContextType
22
+ from ..executors.media_executor import MediaDockerExecutor
23
+
24
+
25
+ class MediaEditorAgent(BaseAgent, MediaAgentHistoryMixin):
26
+ """LLM-Aware Media Editor Agent with conversation context and intelligent routing"""
27
+
28
+ def __init__(self, agent_id: str = None, memory_manager=None, llm_service=None, **kwargs):
29
+ if agent_id is None:
30
+ agent_id = f"media_editor_{str(uuid.uuid4())[:8]}"
31
+
32
+ super().__init__(
33
+ agent_id=agent_id,
34
+ role=AgentRole.CODE_EXECUTOR,
35
+ memory_manager=memory_manager,
36
+ llm_service=llm_service,
37
+ name="Media Editor Agent",
38
+ description="LLM-aware media processing agent with conversation history",
39
+ **kwargs
40
+ )
41
+
42
+ # Initialize history mixin
43
+ self.setup_history_mixin()
44
+
45
+ # Load media configuration and initialize executor
46
+ self._load_media_config()
47
+ self._initialize_media_executor()
48
+ self._add_media_tools()
49
+
50
+ async def _llm_analyze_media_intent(self, user_message: str, conversation_context: str = "") -> Dict[str, Any]:
51
+ """Use LLM to analyze media processing intent"""
52
+ if not self.llm_service:
53
+ return self._keyword_based_media_analysis(user_message)
54
+
55
+ prompt = f"""
56
+ Analyze this user message in the context of media processing and extract:
57
+ 1. Primary intent (extract_audio, convert_video, resize_video, trim_media, create_thumbnail, get_info, help_request)
58
+ 2. Media file references (file paths, video/audio files)
59
+ 3. Output preferences (format, quality, dimensions, timing)
60
+ 4. Context references (referring to previous media operations)
61
+ 5. Technical specifications (codecs, bitrates, resolution, etc.)
62
+
63
+ Conversation Context:
64
+ {conversation_context}
65
+
66
+ Current User Message: {user_message}
67
+
68
+ Respond in JSON format:
69
+ {{
70
+ "primary_intent": "extract_audio|convert_video|resize_video|trim_media|create_thumbnail|get_info|help_request",
71
+ "media_files": ["file1.mp4", "video2.avi"],
72
+ "output_preferences": {{
73
+ "format": "mp4|avi|mp3|wav|etc",
74
+ "quality": "high|medium|low",
75
+ "dimensions": "1920x1080|720p|1080p|4k",
76
+ "timing": {{"start": "00:01:30", "duration": "30s"}},
77
+ "codec": "h264|h265|aac|mp3"
78
+ }},
79
+ "uses_context_reference": true/false,
80
+ "context_type": "previous_file|previous_operation",
81
+ "technical_specs": {{
82
+ "video_codec": "codec_name",
83
+ "audio_codec": "codec_name",
84
+ "bitrate": "value",
85
+ "fps": "value"
86
+ }},
87
+ "confidence": 0.0-1.0
88
+ }}
89
+ """
90
+
91
+ try:
92
+ response = await self.llm_service.generate_response(prompt)
93
+ import re
94
+ json_match = re.search(r'\{.*\}', response, re.DOTALL)
95
+ if json_match:
96
+ return json.loads(json_match.group())
97
+ else:
98
+ return self._extract_media_intent_from_llm_response(response, user_message)
99
+ except Exception as e:
100
+ return self._keyword_based_media_analysis(user_message)
101
+
102
+ def _keyword_based_media_analysis(self, user_message: str) -> Dict[str, Any]:
103
+ """Fallback keyword-based media intent analysis"""
104
+ content_lower = user_message.lower()
105
+
106
+ # Determine intent
107
+ if any(word in content_lower for word in ['extract audio', 'get audio', 'audio from']):
108
+ intent = 'extract_audio'
109
+ elif any(word in content_lower for word in ['convert', 'change format', 'transform']):
110
+ intent = 'convert_video'
111
+ elif any(word in content_lower for word in ['resize', 'scale', 'dimensions']):
112
+ intent = 'resize_video'
113
+ elif any(word in content_lower for word in ['trim', 'cut', 'clip']):
114
+ intent = 'trim_media'
115
+ elif any(word in content_lower for word in ['thumbnail', 'screenshot', 'frame']):
116
+ intent = 'create_thumbnail'
117
+ elif any(word in content_lower for word in ['info', 'information', 'details', 'properties']):
118
+ intent = 'get_info'
119
+ else:
120
+ intent = 'help_request'
121
+
122
+ # Extract media files
123
+ media_files = self.extract_context_from_text(user_message, ContextType.MEDIA_FILE)
124
+ file_paths = self.extract_context_from_text(user_message, ContextType.FILE_PATH)
125
+ all_files = media_files + file_paths
126
+
127
+ # Extract output preferences
128
+ output_format = None
129
+ if 'mp4' in content_lower:
130
+ output_format = 'mp4'
131
+ elif 'mp3' in content_lower:
132
+ output_format = 'mp3'
133
+ elif 'wav' in content_lower:
134
+ output_format = 'wav'
135
+
136
+ quality = 'medium'
137
+ if 'high' in content_lower:
138
+ quality = 'high'
139
+ elif 'low' in content_lower:
140
+ quality = 'low'
141
+
142
+ return {
143
+ "primary_intent": intent,
144
+ "media_files": all_files,
145
+ "output_preferences": {
146
+ "format": output_format,
147
+ "quality": quality,
148
+ "dimensions": None,
149
+ "timing": {},
150
+ "codec": None
151
+ },
152
+ "uses_context_reference": any(word in content_lower for word in ['this', 'that', 'it']),
153
+ "context_type": "previous_file",
154
+ "technical_specs": {},
155
+ "confidence": 0.7
156
+ }
157
+
158
+ async def process_message(self, message: AgentMessage, context: ExecutionContext = None) -> AgentMessage:
159
+ """Process message with LLM-based media intent detection and history context"""
160
+ self.memory.store_message(message)
161
+
162
+ try:
163
+ user_message = message.content
164
+
165
+ # Update conversation state
166
+ self.update_conversation_state(user_message)
167
+
168
+ # Get conversation context for LLM analysis
169
+ conversation_context = self._get_media_conversation_context_summary()
170
+
171
+ # Use LLM to analyze intent
172
+ intent_analysis = await self._llm_analyze_media_intent(user_message, conversation_context)
173
+
174
+ # Route request based on LLM analysis
175
+ response_content = await self._route_media_with_llm_analysis(intent_analysis, user_message, context)
176
+
177
+ response = self.create_response(
178
+ content=response_content,
179
+ recipient_id=message.sender_id,
180
+ session_id=message.session_id,
181
+ conversation_id=message.conversation_id
182
+ )
183
+
184
+ self.memory.store_message(response)
185
+ return response
186
+
187
+ except Exception as e:
188
+ error_response = self.create_response(
189
+ content=f"Media Editor Agent error: {str(e)}",
190
+ recipient_id=message.sender_id,
191
+ message_type=MessageType.ERROR,
192
+ session_id=message.session_id,
193
+ conversation_id=message.conversation_id
194
+ )
195
+ return error_response
196
+
197
+ def _get_media_conversation_context_summary(self) -> str:
198
+ """Get media conversation context summary"""
199
+ try:
200
+ recent_history = self.get_conversation_history_with_context(
201
+ limit=3,
202
+ context_types=[ContextType.MEDIA_FILE, ContextType.FILE_PATH]
203
+ )
204
+
205
+ context_summary = []
206
+ for msg in recent_history:
207
+ if msg.get('message_type') == 'user_input':
208
+ extracted_context = msg.get('extracted_context', {})
209
+ media_files = extracted_context.get('media_file', [])
210
+ file_paths = extracted_context.get('file_path', [])
211
+
212
+ if media_files:
213
+ context_summary.append(f"Previous media file: {media_files[0]}")
214
+ elif file_paths:
215
+ context_summary.append(f"Previous file: {file_paths[0]}")
216
+
217
+ return "\n".join(context_summary) if context_summary else "No previous media context"
218
+ except:
219
+ return "No previous media context"
220
+
221
+ async def _route_media_with_llm_analysis(self, intent_analysis: Dict[str, Any], user_message: str,
222
+ context: ExecutionContext) -> str:
223
+ """Route media request based on LLM intent analysis"""
224
+
225
+ primary_intent = intent_analysis.get("primary_intent", "help_request")
226
+ media_files = intent_analysis.get("media_files", [])
227
+ output_prefs = intent_analysis.get("output_preferences", {})
228
+ uses_context = intent_analysis.get("uses_context_reference", False)
229
+
230
+ # Resolve context references if needed
231
+ if uses_context and not media_files:
232
+ recent_file = self.get_recent_media_file()
233
+ if recent_file:
234
+ media_files = [recent_file]
235
+
236
+ # Route based on intent
237
+ if primary_intent == "help_request":
238
+ return await self._handle_media_help_request(user_message)
239
+ elif primary_intent == "extract_audio":
240
+ return await self._handle_audio_extraction(media_files, output_prefs, user_message)
241
+ elif primary_intent == "convert_video":
242
+ return await self._handle_video_conversion(media_files, output_prefs, user_message)
243
+ elif primary_intent == "resize_video":
244
+ return await self._handle_video_resize(media_files, output_prefs, user_message)
245
+ elif primary_intent == "trim_media":
246
+ return await self._handle_media_trim(media_files, output_prefs, user_message)
247
+ elif primary_intent == "create_thumbnail":
248
+ return await self._handle_thumbnail_creation(media_files, output_prefs, user_message)
249
+ elif primary_intent == "get_info":
250
+ return await self._handle_media_info(media_files, user_message)
251
+ else:
252
+ return await self._handle_media_help_request(user_message)
253
+
254
+ async def _handle_audio_extraction(self, media_files: List[str], output_prefs: Dict[str, Any],
255
+ user_message: str) -> str:
256
+ """Handle audio extraction with LLM analysis"""
257
+
258
+ if not media_files:
259
+ recent_file = self.get_recent_media_file()
260
+ if recent_file:
261
+ return f"I can extract audio from media files. Did you mean to extract audio from **{recent_file}**? Please confirm."
262
+ else:
263
+ return "I can extract audio from video files. Please provide the video file path.\n\n" \
264
+ "Example: 'Extract audio from video.mp4 as high quality mp3'"
265
+
266
+ input_file = media_files[0]
267
+ output_format = output_prefs.get("format", "mp3")
268
+ quality = output_prefs.get("quality", "medium")
269
+
270
+ try:
271
+ result = await self._extract_audio_from_video(input_file, output_format, quality)
272
+
273
+ if result['success']:
274
+ return f"✅ **Audio Extraction Completed**\n\n" \
275
+ f"📁 **Input:** {input_file}\n" \
276
+ f"🎵 **Output:** {result.get('output_file', 'Unknown')}\n" \
277
+ f"📊 **Format:** {output_format.upper()}\n" \
278
+ f"🎚️ **Quality:** {quality}\n" \
279
+ f"⏱️ **Time:** {result.get('execution_time', 0):.2f}s\n\n" \
280
+ f"Your audio file is ready! 🎉"
281
+ else:
282
+ return f"❌ **Audio extraction failed:** {result.get('error', 'Unknown error')}"
283
+
284
+ except Exception as e:
285
+ return f"❌ **Error during audio extraction:** {str(e)}"
286
+
287
+ async def _handle_video_conversion(self, media_files: List[str], output_prefs: Dict[str, Any],
288
+ user_message: str) -> str:
289
+ """Handle video conversion with LLM analysis"""
290
+
291
+ if not media_files:
292
+ recent_file = self.get_recent_media_file()
293
+ if recent_file:
294
+ return f"I can convert video files. Did you mean to convert **{recent_file}**? Please specify the target format."
295
+ else:
296
+ return "I can convert video files. Please provide:\n\n" \
297
+ "1. Video file path\n" \
298
+ "2. Target format (mp4, avi, mov, mkv, webm)\n\n" \
299
+ "Example: 'Convert video.avi to mp4'"
300
+
301
+ input_file = media_files[0]
302
+ output_format = output_prefs.get("format", "mp4")
303
+ video_codec = output_prefs.get("codec", "h264")
304
+
305
+ try:
306
+ result = await self._convert_video_format(input_file, output_format, video_codec)
307
+
308
+ if result['success']:
309
+ return f"✅ **Video Conversion Completed**\n\n" \
310
+ f"📁 **Input:** {input_file}\n" \
311
+ f"🎬 **Output:** {result.get('output_file', 'Unknown')}\n" \
312
+ f"📊 **Format:** {output_format.upper()}\n" \
313
+ f"🔧 **Codec:** {video_codec}\n" \
314
+ f"⏱️ **Time:** {result.get('execution_time', 0):.2f}s\n\n" \
315
+ f"Your converted video is ready! 🎉"
316
+ else:
317
+ return f"❌ **Video conversion failed:** {result.get('error', 'Unknown error')}"
318
+
319
+ except Exception as e:
320
+ return f"❌ **Error during video conversion:** {str(e)}"
321
+
322
+ async def _handle_video_resize(self, media_files: List[str], output_prefs: Dict[str, Any],
323
+ user_message: str) -> str:
324
+ """Handle video resize with LLM analysis"""
325
+
326
+ if not media_files:
327
+ recent_file = self.get_recent_media_file()
328
+ if recent_file:
329
+ return f"I can resize videos. Did you mean to resize **{recent_file}**? Please specify dimensions."
330
+ else:
331
+ return "I can resize videos. Please provide:\n\n" \
332
+ "1. Video file path\n" \
333
+ "2. Target dimensions (1920x1080, 720p, 1080p, 4k)\n\n" \
334
+ "Example: 'Resize video.mp4 to 720p'"
335
+
336
+ input_file = media_files[0]
337
+ dimensions = output_prefs.get("dimensions")
338
+
339
+ # Parse dimensions
340
+ width, height = self._parse_dimensions(dimensions, user_message)
341
+
342
+ if not width or not height:
343
+ return f"Please specify dimensions for resizing **{input_file}**.\n\n" \
344
+ f"Examples: '720p', '1080p', '1920x1080'"
345
+
346
+ try:
347
+ result = await self._resize_video(input_file, width, height)
348
+
349
+ if result['success']:
350
+ return f"✅ **Video Resize Completed**\n\n" \
351
+ f"📁 **Input:** {input_file}\n" \
352
+ f"🎬 **Output:** {result.get('output_file', 'Unknown')}\n" \
353
+ f"📏 **Dimensions:** {width}x{height}\n" \
354
+ f"⏱️ **Time:** {result.get('execution_time', 0):.2f}s\n\n" \
355
+ f"Your resized video is ready! 🎉"
356
+ else:
357
+ return f"❌ **Video resize failed:** {result.get('error', 'Unknown error')}"
358
+
359
+ except Exception as e:
360
+ return f"❌ **Error during video resize:** {str(e)}"
361
+
362
+ async def _handle_media_trim(self, media_files: List[str], output_prefs: Dict[str, Any], user_message: str) -> str:
363
+ """Handle media trimming with LLM analysis"""
364
+
365
+ if not media_files:
366
+ recent_file = self.get_recent_media_file()
367
+ if recent_file:
368
+ return f"I can trim media files. Did you mean to trim **{recent_file}**? Please specify start time and duration."
369
+ else:
370
+ return "I can trim media files. Please provide:\n\n" \
371
+ "1. Media file path\n" \
372
+ "2. Start time (HH:MM:SS)\n" \
373
+ "3. Duration or end time\n\n" \
374
+ "Example: 'Trim video.mp4 from 00:01:30 for 30 seconds'"
375
+
376
+ input_file = media_files[0]
377
+ timing = output_prefs.get("timing", {})
378
+
379
+ start_time = timing.get("start")
380
+ duration = timing.get("duration")
381
+
382
+ # Parse timing from message if not in preferences
383
+ if not start_time or not duration:
384
+ start_time, duration = self._parse_timing_from_message(user_message)
385
+
386
+ if not start_time:
387
+ return f"Please specify the start time for trimming **{input_file}**.\n\n" \
388
+ f"Example: 'Trim from 00:01:30 for 30 seconds'"
389
+
390
+ if not duration:
391
+ return f"Please specify the duration for trimming **{input_file}** from {start_time}.\n\n" \
392
+ f"Example: 'for 30 seconds' or 'for 2 minutes'"
393
+
394
+ try:
395
+ result = await self._trim_media(input_file, start_time, duration)
396
+
397
+ if result['success']:
398
+ return f"✅ **Media Trim Completed**\n\n" \
399
+ f"📁 **Input:** {input_file}\n" \
400
+ f"🎬 **Output:** {result.get('output_file', 'Unknown')}\n" \
401
+ f"⏱️ **Start:** {start_time}\n" \
402
+ f"⏰ **Duration:** {duration}\n" \
403
+ f"🕐 **Time:** {result.get('execution_time', 0):.2f}s\n\n" \
404
+ f"Your trimmed media is ready! 🎉"
405
+ else:
406
+ return f"❌ **Media trim failed:** {result.get('error', 'Unknown error')}"
407
+
408
+ except Exception as e:
409
+ return f"❌ **Error during media trim:** {str(e)}"
410
+
411
+ async def _handle_thumbnail_creation(self, media_files: List[str], output_prefs: Dict[str, Any],
412
+ user_message: str) -> str:
413
+ """Handle thumbnail creation with LLM analysis"""
414
+
415
+ if not media_files:
416
+ recent_file = self.get_recent_media_file()
417
+ if recent_file:
418
+ return f"I can create thumbnails from videos. Did you mean to create a thumbnail from **{recent_file}**?"
419
+ else:
420
+ return "I can create thumbnails from videos. Please provide:\n\n" \
421
+ "1. Video file path\n" \
422
+ "2. Timestamp (HH:MM:SS) - optional\n\n" \
423
+ "Example: 'Create thumbnail from video.mp4 at 00:05:00'"
424
+
425
+ input_file = media_files[0]
426
+ timing = output_prefs.get("timing", {})
427
+ timestamp = timing.get("start", "00:00:05")
428
+ output_format = output_prefs.get("format", "jpg")
429
+
430
+ try:
431
+ result = await self._create_video_thumbnail(input_file, timestamp, output_format)
432
+
433
+ if result['success']:
434
+ return f"✅ **Thumbnail Created**\n\n" \
435
+ f"📁 **Video:** {input_file}\n" \
436
+ f"🖼️ **Thumbnail:** {result.get('output_file', 'Unknown')}\n" \
437
+ f"⏱️ **Timestamp:** {timestamp}\n" \
438
+ f"📊 **Format:** {output_format.upper()}\n" \
439
+ f"🕐 **Time:** {result.get('execution_time', 0):.2f}s\n\n" \
440
+ f"Your thumbnail is ready! 🎉"
441
+ else:
442
+ return f"❌ **Thumbnail creation failed:** {result.get('error', 'Unknown error')}"
443
+
444
+ except Exception as e:
445
+ return f"❌ **Error during thumbnail creation:** {str(e)}"
446
+
447
+ async def _handle_media_info(self, media_files: List[str], user_message: str) -> str:
448
+ """Handle media info requests with LLM analysis"""
449
+
450
+ if not media_files:
451
+ recent_file = self.get_recent_media_file()
452
+ if recent_file:
453
+ return f"I can provide information about media files. Did you mean to get info for **{recent_file}**?"
454
+ else:
455
+ return "I can provide detailed information about media files.\n\n" \
456
+ "Please provide the path to a media file."
457
+
458
+ input_file = media_files[0]
459
+
460
+ try:
461
+ result = await self._get_media_info(input_file)
462
+
463
+ if result['success']:
464
+ info = result.get('media_info', {})
465
+ return f"📊 **Media Information for {input_file}**\n\n" \
466
+ f"**📄 File:** {info.get('filename', 'Unknown')}\n" \
467
+ f"**📦 Format:** {info.get('format', 'Unknown')}\n" \
468
+ f"**⏱️ Duration:** {info.get('duration', 'Unknown')}\n" \
469
+ f"**📏 Resolution:** {info.get('resolution', 'Unknown')}\n" \
470
+ f"**🎬 Video Codec:** {info.get('video_codec', 'Unknown')}\n" \
471
+ f"**🎵 Audio Codec:** {info.get('audio_codec', 'Unknown')}\n" \
472
+ f"**📊 File Size:** {info.get('file_size', 'Unknown')}\n\n" \
473
+ f"🎉 Information retrieval completed!"
474
+ else:
475
+ return f"❌ **Failed to get media info:** {result.get('error', 'Unknown error')}"
476
+
477
+ except Exception as e:
478
+ return f"❌ **Error getting media info:** {str(e)}"
479
+
480
+ async def _handle_media_help_request(self, user_message: str) -> str:
481
+ """Handle media help requests with conversation context"""
482
+
483
+ state = self.get_conversation_state()
484
+
485
+ response = ("I'm your Media Editor Agent! I can help you with:\n\n"
486
+ "🎥 **Video Processing**\n"
487
+ "- Extract audio from videos\n"
488
+ "- Convert between formats (MP4, AVI, MOV, MKV)\n"
489
+ "- Resize and scale videos\n"
490
+ "- Create thumbnails and frames\n"
491
+ "- Trim and cut clips\n\n"
492
+ "🎵 **Audio Processing**\n"
493
+ "- Convert audio formats (MP3, WAV, AAC, FLAC)\n"
494
+ "- Extract from videos\n"
495
+ "- Adjust quality settings\n\n"
496
+ "🧠 **Smart Context Features**\n"
497
+ "- Remembers files from previous messages\n"
498
+ "- Understands 'that video' and 'this file'\n"
499
+ "- Maintains working context\n\n")
500
+
501
+ # Add current context information
502
+ if state.current_resource:
503
+ response += f"🎯 **Current File:** {state.current_resource}\n"
504
+
505
+ if state.working_files:
506
+ response += f"📁 **Working Files:** {len(state.working_files)} files\n"
507
+ for file in state.working_files[-3:]: # Show last 3
508
+ response += f" • {file}\n"
509
+
510
+ response += "\n💡 **Examples:**\n"
511
+ response += "• 'Extract audio from video.mp4 as MP3'\n"
512
+ response += "• 'Convert that video to MP4'\n"
513
+ response += "• 'Resize it to 720p'\n"
514
+ response += "• 'Create a thumbnail at 2 minutes'\n"
515
+ response += "\nI understand context from our conversation! 🚀"
516
+
517
+ return response
518
+
519
+ def _parse_dimensions(self, dimensions: str, user_message: str) -> tuple:
520
+ """Parse dimensions from preferences or message"""
521
+ if dimensions:
522
+ if dimensions == "720p":
523
+ return 1280, 720
524
+ elif dimensions == "1080p":
525
+ return 1920, 1080
526
+ elif dimensions == "4k":
527
+ return 3840, 2160
528
+ elif "x" in dimensions:
529
+ try:
530
+ width, height = dimensions.split("x")
531
+ return int(width), int(height)
532
+ except:
533
+ pass
534
+
535
+ # Parse from user message
536
+ import re
537
+ if '720p' in user_message.lower():
538
+ return 1280, 720
539
+ elif '1080p' in user_message.lower():
540
+ return 1920, 1080
541
+ elif '4k' in user_message.lower():
542
+ return 3840, 2160
543
+ else:
544
+ dimension_match = re.search(r'(\d+)x(\d+)', user_message)
545
+ if dimension_match:
546
+ return int(dimension_match.group(1)), int(dimension_match.group(2))
547
+
548
+ return None, None
549
+
550
+ def _parse_timing_from_message(self, user_message: str) -> tuple:
551
+ """Parse timing information from user message"""
552
+ import re
553
+
554
+ # Look for time patterns
555
+ time_patterns = re.findall(r'\b\d{1,2}:\d{2}:\d{2}\b', user_message)
556
+ duration_patterns = re.findall(r'(\d+)\s*(?:seconds?|secs?|minutes?|mins?)', user_message, re.IGNORECASE)
557
+
558
+ start_time = time_patterns[0] if time_patterns else None
559
+
560
+ duration = None
561
+ if duration_patterns:
562
+ duration_num = duration_patterns[0]
563
+ if 'minute' in user_message.lower() or 'min' in user_message.lower():
564
+ duration = f"00:{duration_num:0>2}:00"
565
+ else:
566
+ duration = f"{int(duration_num)}"
567
+
568
+ return start_time, duration
569
+
570
+ def _extract_media_intent_from_llm_response(self, llm_response: str, user_message: str) -> Dict[str, Any]:
571
+ """Extract media intent from non-JSON LLM response"""
572
+ content_lower = llm_response.lower()
573
+
574
+ if 'extract' in content_lower and 'audio' in content_lower:
575
+ intent = 'extract_audio'
576
+ elif 'convert' in content_lower:
577
+ intent = 'convert_video'
578
+ elif 'resize' in content_lower:
579
+ intent = 'resize_video'
580
+ elif 'trim' in content_lower or 'cut' in content_lower:
581
+ intent = 'trim_media'
582
+ elif 'thumbnail' in content_lower:
583
+ intent = 'create_thumbnail'
584
+ elif 'info' in content_lower:
585
+ intent = 'get_info'
586
+ else:
587
+ intent = 'help_request'
588
+
589
+ return {
590
+ "primary_intent": intent,
591
+ "media_files": [],
592
+ "output_preferences": {"format": None, "quality": "medium"},
593
+ "uses_context_reference": False,
594
+ "context_type": "none",
595
+ "technical_specs": {},
596
+ "confidence": 0.6
597
+ }
598
+
599
+ # Load configuration and initialize
600
+ def _load_media_config(self):
601
+ """Load media configuration"""
602
+ try:
603
+ config = load_config()
604
+ self.media_config = get_config_section('media_editor', config)
605
+ except Exception as e:
606
+ self.media_config = {
607
+ 'docker_image': 'sgosain/amb-ubuntu-python-public-pod',
608
+ 'timeout': 300,
609
+ 'input_dir': './examples/media_input',
610
+ 'output_dir': './examples/media_output'
611
+ }
612
+
613
+ def _initialize_media_executor(self):
614
+ """Initialize media executor"""
615
+ from ..executors.media_executor import MediaDockerExecutor
616
+ self.media_executor = MediaDockerExecutor(self.media_config)
617
+
618
+ def _add_media_tools(self):
619
+ """Add media processing tools"""
620
+
621
+ # Extract audio from video tool
622
+ self.add_tool(AgentTool(
623
+ name="extract_audio_from_video",
624
+ description="Extract audio track from video file",
625
+ function=self._extract_audio_from_video,
626
+ parameters_schema={
627
+ "type": "object",
628
+ "properties": {
629
+ "input_video": {"type": "string", "description": "Path to input video file"},
630
+ "output_format": {"type": "string", "enum": ["mp3", "wav", "aac", "flac"], "default": "mp3"},
631
+ "audio_quality": {"type": "string", "enum": ["high", "medium", "low"], "default": "medium"}
632
+ },
633
+ "required": ["input_video"]
634
+ }
635
+ ))
636
+
637
+ # Convert video format tool
638
+ self.add_tool(AgentTool(
639
+ name="convert_video_format",
640
+ description="Convert video to different format/codec",
641
+ function=self._convert_video_format,
642
+ parameters_schema={
643
+ "type": "object",
644
+ "properties": {
645
+ "input_video": {"type": "string", "description": "Path to input video file"},
646
+ "output_format": {"type": "string", "enum": ["mp4", "avi", "mov", "mkv", "webm"], "default": "mp4"},
647
+ "video_codec": {"type": "string", "enum": ["h264", "h265", "vp9", "copy"], "default": "h264"},
648
+ "audio_codec": {"type": "string", "enum": ["aac", "mp3", "opus", "copy"], "default": "aac"},
649
+ "crf": {"type": "integer", "minimum": 0, "maximum": 51, "default": 23}
650
+ },
651
+ "required": ["input_video"]
652
+ }
653
+ ))
654
+
655
+ # Get media information tool
656
+ self.add_tool(AgentTool(
657
+ name="get_media_info",
658
+ description="Get detailed information about media file",
659
+ function=self._get_media_info,
660
+ parameters_schema={
661
+ "type": "object",
662
+ "properties": {
663
+ "file_path": {"type": "string", "description": "Path to media file"}
664
+ },
665
+ "required": ["file_path"]
666
+ }
667
+ ))
668
+
669
+ # Resize video tool
670
+ self.add_tool(AgentTool(
671
+ name="resize_video",
672
+ description="Resize video to specific dimensions",
673
+ function=self._resize_video,
674
+ parameters_schema={
675
+ "type": "object",
676
+ "properties": {
677
+ "input_video": {"type": "string", "description": "Path to input video file"},
678
+ "width": {"type": "integer", "description": "Target width in pixels"},
679
+ "height": {"type": "integer", "description": "Target height in pixels"},
680
+ "maintain_aspect": {"type": "boolean", "default": True},
681
+ "preset": {"type": "string", "enum": ["720p", "1080p", "4k", "480p", "custom"], "default": "custom"}
682
+ },
683
+ "required": ["input_video"]
684
+ }
685
+ ))
686
+
687
+ # Trim media tool
688
+ self.add_tool(AgentTool(
689
+ name="trim_media",
690
+ description="Trim/cut media file to specific time range",
691
+ function=self._trim_media,
692
+ parameters_schema={
693
+ "type": "object",
694
+ "properties": {
695
+ "input_file": {"type": "string", "description": "Path to input media file"},
696
+ "start_time": {"type": "string", "description": "Start time (HH:MM:SS or seconds)"},
697
+ "duration": {"type": "string", "description": "Duration (HH:MM:SS or seconds)"},
698
+ "end_time": {"type": "string", "description": "End time (alternative to duration)"}
699
+ },
700
+ "required": ["input_file", "start_time"]
701
+ }
702
+ ))
703
+
704
+ # Create video thumbnail tool
705
+ self.add_tool(AgentTool(
706
+ name="create_video_thumbnail",
707
+ description="Extract thumbnail/frame from video",
708
+ function=self._create_video_thumbnail,
709
+ parameters_schema={
710
+ "type": "object",
711
+ "properties": {
712
+ "input_video": {"type": "string", "description": "Path to input video file"},
713
+ "timestamp": {"type": "string", "description": "Time to extract frame (HH:MM:SS)",
714
+ "default": "00:00:05"},
715
+ "output_format": {"type": "string", "enum": ["jpg", "png", "bmp"], "default": "jpg"},
716
+ "width": {"type": "integer", "description": "Thumbnail width", "default": 320}
717
+ },
718
+ "required": ["input_video"]
719
+ }
720
+ ))
721
+
722
+ # Media processing method implementations
723
+ async def _extract_audio_from_video(self, input_video: str, output_format: str = "mp3",
724
+ audio_quality: str = "medium"):
725
+ """Extract audio from video file"""
726
+ try:
727
+ if not Path(input_video).exists():
728
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
729
+
730
+ # Quality settings
731
+ quality_settings = {
732
+ "low": "-b:a 128k",
733
+ "medium": "-b:a 192k",
734
+ "high": "-b:a 320k"
735
+ }
736
+
737
+ output_filename = f"extracted_audio_{int(time.time())}.{output_format}"
738
+
739
+ ffmpeg_command = (
740
+ f"ffmpeg -i ${{input_video}} "
741
+ f"{quality_settings.get(audio_quality, quality_settings['medium'])} "
742
+ f"-vn -acodec {self._get_audio_codec(output_format)} "
743
+ f"${{OUTPUT}}"
744
+ )
745
+
746
+ result = self.media_executor.execute_ffmpeg_command(
747
+ ffmpeg_command=ffmpeg_command,
748
+ input_files={'input_video': input_video},
749
+ output_filename=output_filename
750
+ )
751
+
752
+ if result['success']:
753
+ return {
754
+ "success": True,
755
+ "message": f"Audio extracted successfully to {output_format}",
756
+ "output_file": result['output_file'],
757
+ "input_video": input_video,
758
+ "execution_time": result['execution_time']
759
+ }
760
+ else:
761
+ return result
762
+
763
+ except Exception as e:
764
+ return {"success": False, "error": str(e)}
765
+
766
+ async def _convert_video_format(self, input_video: str, output_format: str = "mp4",
767
+ video_codec: str = "h264", audio_codec: str = "aac", crf: int = 23):
768
+ """Convert video format"""
769
+ try:
770
+ if not Path(input_video).exists():
771
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
772
+
773
+ output_filename = f"converted_video_{int(time.time())}.{output_format}"
774
+
775
+ ffmpeg_command = (
776
+ f"ffmpeg -i ${{input_video}} "
777
+ f"-c:v {video_codec} -c:a {audio_codec} "
778
+ f"-crf {crf} "
779
+ f"${{OUTPUT}}"
780
+ )
781
+
782
+ result = self.media_executor.execute_ffmpeg_command(
783
+ ffmpeg_command=ffmpeg_command,
784
+ input_files={'input_video': input_video},
785
+ output_filename=output_filename
786
+ )
787
+
788
+ if result['success']:
789
+ return {
790
+ "success": True,
791
+ "message": f"Video converted successfully to {output_format}",
792
+ "output_file": result['output_file'],
793
+ "input_video": input_video,
794
+ "execution_time": result['execution_time']
795
+ }
796
+ else:
797
+ return result
798
+
799
+ except Exception as e:
800
+ return {"success": False, "error": str(e)}
801
+
802
+ async def _get_media_info(self, file_path: str):
803
+ """Get media info"""
804
+ try:
805
+ if not Path(file_path).exists():
806
+ return {"success": False, "error": f"Media file not found: {file_path}"}
807
+
808
+ ffprobe_command = (
809
+ f"ffprobe -v quiet -print_format json -show_format -show_streams "
810
+ f"${{input_file}}"
811
+ )
812
+
813
+ result = self.media_executor.execute_ffmpeg_command(
814
+ ffmpeg_command=ffprobe_command,
815
+ input_files={'input_file': file_path},
816
+ output_filename=None # No output file for info
817
+ )
818
+
819
+ if result['success']:
820
+ # Parse ffprobe output
821
+ try:
822
+ info_data = json.loads(result.get('output', '{}'))
823
+ format_info = info_data.get('format', {})
824
+ streams = info_data.get('streams', [])
825
+
826
+ video_stream = next((s for s in streams if s.get('codec_type') == 'video'), {})
827
+ audio_stream = next((s for s in streams if s.get('codec_type') == 'audio'), {})
828
+
829
+ media_info = {
830
+ 'filename': Path(file_path).name,
831
+ 'format': format_info.get('format_name', 'Unknown'),
832
+ 'duration': format_info.get('duration', 'Unknown'),
833
+ 'file_size': format_info.get('size', 'Unknown'),
834
+ 'resolution': f"{video_stream.get('width', 'Unknown')}x{video_stream.get('height', 'Unknown')}" if video_stream else 'N/A',
835
+ 'video_codec': video_stream.get('codec_name', 'N/A'),
836
+ 'audio_codec': audio_stream.get('codec_name', 'N/A')
837
+ }
838
+
839
+ return {
840
+ "success": True,
841
+ "media_info": media_info,
842
+ "execution_time": result['execution_time']
843
+ }
844
+ except json.JSONDecodeError:
845
+ return {"success": False, "error": "Failed to parse media information"}
846
+ else:
847
+ return result
848
+
849
+ except Exception as e:
850
+ return {"success": False, "error": str(e)}
851
+
852
+ async def _resize_video(self, input_video: str, width: int = None, height: int = None,
853
+ maintain_aspect: bool = True, preset: str = "custom"):
854
+ """Resize video"""
855
+ try:
856
+ if not Path(input_video).exists():
857
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
858
+
859
+ # Handle presets
860
+ if preset == "720p":
861
+ width, height = 1280, 720
862
+ elif preset == "1080p":
863
+ width, height = 1920, 1080
864
+ elif preset == "4k":
865
+ width, height = 3840, 2160
866
+ elif preset == "480p":
867
+ width, height = 854, 480
868
+
869
+ if not width or not height:
870
+ return {"success": False, "error": "Width and height must be specified"}
871
+
872
+ output_filename = f"resized_video_{int(time.time())}.mp4"
873
+
874
+ scale_filter = f"scale={width}:{height}"
875
+ if maintain_aspect:
876
+ scale_filter = f"scale={width}:{height}:force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2"
877
+
878
+ ffmpeg_command = (
879
+ f"ffmpeg -i ${{input_video}} "
880
+ f"-vf \"{scale_filter}\" "
881
+ f"-c:a copy "
882
+ f"${{OUTPUT}}"
883
+ )
884
+
885
+ result = self.media_executor.execute_ffmpeg_command(
886
+ ffmpeg_command=ffmpeg_command,
887
+ input_files={'input_video': input_video},
888
+ output_filename=output_filename
889
+ )
890
+
891
+ if result['success']:
892
+ return {
893
+ "success": True,
894
+ "message": f"Video resized successfully to {width}x{height}",
895
+ "output_file": result['output_file'],
896
+ "input_video": input_video,
897
+ "execution_time": result['execution_time']
898
+ }
899
+ else:
900
+ return result
901
+
902
+ except Exception as e:
903
+ return {"success": False, "error": str(e)}
904
+
905
+ async def _trim_media(self, input_file: str, start_time: str, duration: str = None, end_time: str = None):
906
+ """Trim media"""
907
+ try:
908
+ if not Path(input_file).exists():
909
+ return {"success": False, "error": f"Input file not found: {input_file}"}
910
+
911
+ output_filename = f"trimmed_media_{int(time.time())}.{Path(input_file).suffix[1:]}"
912
+
913
+ # Build ffmpeg command
914
+ ffmpeg_command = f"ffmpeg -i ${{input_file}} -ss {start_time} "
915
+
916
+ if duration:
917
+ ffmpeg_command += f"-t {duration} "
918
+ elif end_time:
919
+ ffmpeg_command += f"-to {end_time} "
920
+ else:
921
+ return {"success": False, "error": "Either duration or end_time must be specified"}
922
+
923
+ ffmpeg_command += "-c copy ${{OUTPUT}}"
924
+
925
+ result = self.media_executor.execute_ffmpeg_command(
926
+ ffmpeg_command=ffmpeg_command,
927
+ input_files={'input_file': input_file},
928
+ output_filename=output_filename
929
+ )
930
+
931
+ if result['success']:
932
+ return {
933
+ "success": True,
934
+ "message": f"Media trimmed successfully",
935
+ "output_file": result['output_file'],
936
+ "input_file": input_file,
937
+ "execution_time": result['execution_time']
938
+ }
939
+ else:
940
+ return result
941
+
942
+ except Exception as e:
943
+ return {"success": False, "error": str(e)}
944
+
945
+ async def _create_video_thumbnail(self, input_video: str, timestamp: str = "00:00:05",
946
+ output_format: str = "jpg", width: int = 320):
947
+ """Create thumbnail"""
948
+ try:
949
+ if not Path(input_video).exists():
950
+ return {"success": False, "error": f"Input video file not found: {input_video}"}
951
+
952
+ output_filename = f"thumbnail_{int(time.time())}.{output_format}"
953
+
954
+ ffmpeg_command = (
955
+ f"ffmpeg -i ${{input_video}} "
956
+ f"-ss {timestamp} "
957
+ f"-vframes 1 "
958
+ f"-vf scale={width}:-1 "
959
+ f"${{OUTPUT}}"
960
+ )
961
+
962
+ result = self.media_executor.execute_ffmpeg_command(
963
+ ffmpeg_command=ffmpeg_command,
964
+ input_files={'input_video': input_video},
965
+ output_filename=output_filename
966
+ )
967
+
968
+ if result['success']:
969
+ return {
970
+ "success": True,
971
+ "message": f"Thumbnail created successfully",
972
+ "output_file": result['output_file'],
973
+ "input_video": input_video,
974
+ "execution_time": result['execution_time']
975
+ }
976
+ else:
977
+ return result
978
+
979
+ except Exception as e:
980
+ return {"success": False, "error": str(e)}
981
+
982
+ def _get_audio_codec(self, format: str) -> str:
983
+ """Get appropriate audio codec for format"""
984
+ codec_map = {
985
+ "mp3": "libmp3lame",
986
+ "aac": "aac",
987
+ "wav": "pcm_s16le",
988
+ "flac": "flac",
989
+ "ogg": "libvorbis",
990
+ "opus": "libopus"
991
+ }
992
+ return codec_map.get(format, "aac")