ambivo-agents 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,553 @@
1
+ # ambivo_agents/agents/youtube_download.py
2
+ """
3
+ YouTube Download Agent with pytubefix integration
4
+ Handles YouTube video and audio downloads using Docker containers
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import uuid
10
+ import time
11
+ import re
12
+ from pathlib import Path
13
+ from typing import Dict, List, Any, Optional
14
+ from datetime import datetime
15
+ import logging
16
+
17
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
18
+ from ..config.loader import load_config, get_config_section
19
+ from ..executors.youtube_executor import YouTubeDockerExecutor
20
+
21
+
22
+ class YouTubeDownloadAgent(BaseAgent):
23
+ """YouTube Download Agent for downloading videos and audio from YouTube"""
24
+
25
+ def __init__(self, agent_id: str = None, memory_manager=None, llm_service=None, **kwargs):
26
+ # Generate ID if not provided
27
+ if agent_id is None:
28
+ agent_id = f"youtube_{str(uuid.uuid4())[:8]}"
29
+
30
+ super().__init__(
31
+ agent_id=agent_id,
32
+ role=AgentRole.CODE_EXECUTOR,
33
+ memory_manager=memory_manager,
34
+ llm_service=llm_service,
35
+ name="YouTube Download Agent",
36
+ description="Agent for downloading videos and audio from YouTube using pytubefix",
37
+ **kwargs
38
+ )
39
+
40
+ # Load YouTube configuration
41
+ try:
42
+ if hasattr(self, 'config') and self.config:
43
+ self.youtube_config = self.config.get('youtube_download', {})
44
+ else:
45
+ config = load_config()
46
+ self.youtube_config = config.get('youtube_download', {})
47
+ except Exception as e:
48
+ # Provide sensible defaults if config fails
49
+ self.youtube_config = {
50
+ 'docker_image': 'sgosain/amb-ubuntu-python-public-pod',
51
+ 'download_dir': './youtube_downloads',
52
+ 'timeout': 600,
53
+ 'memory_limit': '1g',
54
+ 'default_audio_only': True
55
+ }
56
+ #logging.warning(f"Using default YouTube config due to: {e}")
57
+
58
+ # ===
59
+ # YouTube-specific initialization
60
+ self._load_youtube_config()
61
+ self._initialize_youtube_executor()
62
+ self._add_youtube_tools()
63
+
64
+ def _add_youtube_tools(self):
65
+ """Add all YouTube download tools"""
66
+
67
+ # Download video/audio tool
68
+ self.add_tool(AgentTool(
69
+ name="download_youtube",
70
+ description="Download video or audio from YouTube URL",
71
+ function=self._download_youtube,
72
+ parameters_schema={
73
+ "type": "object",
74
+ "properties": {
75
+ "url": {"type": "string", "description": "YouTube URL to download"},
76
+ "audio_only": {"type": "boolean", "default": True, "description": "Download only audio if True"},
77
+ "custom_filename": {"type": "string", "description": "Custom filename (optional)"}
78
+ },
79
+ "required": ["url"]
80
+ }
81
+ ))
82
+
83
+ # Get video information tool
84
+ self.add_tool(AgentTool(
85
+ name="get_youtube_info",
86
+ description="Get information about a YouTube video without downloading",
87
+ function=self._get_youtube_info,
88
+ parameters_schema={
89
+ "type": "object",
90
+ "properties": {
91
+ "url": {"type": "string", "description": "YouTube URL to get information about"}
92
+ },
93
+ "required": ["url"]
94
+ }
95
+ ))
96
+
97
+ # Download audio specifically
98
+ self.add_tool(AgentTool(
99
+ name="download_youtube_audio",
100
+ description="Download audio only from YouTube URL",
101
+ function=self._download_youtube_audio,
102
+ parameters_schema={
103
+ "type": "object",
104
+ "properties": {
105
+ "url": {"type": "string", "description": "YouTube URL to download audio from"},
106
+ "custom_filename": {"type": "string", "description": "Custom filename (optional)"}
107
+ },
108
+ "required": ["url"]
109
+ }
110
+ ))
111
+
112
+ # Download video specifically
113
+ self.add_tool(AgentTool(
114
+ name="download_youtube_video",
115
+ description="Download video from YouTube URL",
116
+ function=self._download_youtube_video,
117
+ parameters_schema={
118
+ "type": "object",
119
+ "properties": {
120
+ "url": {"type": "string", "description": "YouTube URL to download video from"},
121
+ "custom_filename": {"type": "string", "description": "Custom filename (optional)"}
122
+ },
123
+ "required": ["url"]
124
+ }
125
+ ))
126
+
127
+ # Batch download tool
128
+ self.add_tool(AgentTool(
129
+ name="batch_download_youtube",
130
+ description="Download multiple YouTube videos/audio",
131
+ function=self._batch_download_youtube,
132
+ parameters_schema={
133
+ "type": "object",
134
+ "properties": {
135
+ "urls": {"type": "array", "items": {"type": "string"}, "description": "List of YouTube URLs"},
136
+ "audio_only": {"type": "boolean", "default": True, "description": "Download only audio if True"}
137
+ },
138
+ "required": ["urls"]
139
+ }
140
+ ))
141
+
142
+ def _load_youtube_config(self):
143
+ """Load YouTube configuration with fallbacks"""
144
+ try:
145
+ if hasattr(self, 'config') and self.config:
146
+ self.youtube_config = self.config.get('youtube_download', {})
147
+ logging.info("Loaded YouTube config from agent config")
148
+ else:
149
+ config = load_config()
150
+ self.youtube_config = config.get('youtube_download', {})
151
+ logging.info("Loaded YouTube config from file")
152
+ except Exception as e:
153
+ # Provide sensible defaults if config fails
154
+ self.youtube_config = {
155
+ 'docker_image': 'sgosain/amb-ubuntu-python-public-pod',
156
+ 'download_dir': './youtube_downloads',
157
+ 'timeout': 600,
158
+ 'memory_limit': '1g',
159
+ 'default_audio_only': True
160
+ }
161
+ #logging.warning(f"Using default YouTube config due to: {e}")
162
+
163
+ def _initialize_youtube_executor(self):
164
+ """Initialize the YouTube executor"""
165
+ try:
166
+ from ..executors.youtube_executor import YouTubeDockerExecutor
167
+ self.youtube_executor = YouTubeDockerExecutor(self.youtube_config)
168
+ logging.info("YouTube executor initialized successfully")
169
+ except Exception as e:
170
+ logging.error(f"Failed to initialize YouTube executor: {e}")
171
+ raise RuntimeError(f"Failed to initialize YouTube executor: {e}")
172
+
173
+ @classmethod
174
+ def create_simple(cls, agent_id: str = None, **kwargs):
175
+ """
176
+ Create agent with auto-configuration (recommended for most users)
177
+
178
+ Args:
179
+ agent_id: Optional agent ID. If None, auto-generates one.
180
+ **kwargs: Additional arguments passed to constructor
181
+
182
+ Returns:
183
+ YouTubeDownloadAgent: Configured agent ready to use
184
+ """
185
+ # Auto-generate ID if not provided
186
+ if agent_id is None:
187
+ agent_id = f"youtube_{str(uuid.uuid4())[:8]}"
188
+
189
+ # Create with auto-configuration enabled
190
+ return cls(
191
+ agent_id=agent_id,
192
+ auto_configure=True, # Enable auto-configuration
193
+ **kwargs
194
+ )
195
+
196
+ @classmethod
197
+ def create_advanced(cls,
198
+ agent_id: str,
199
+ memory_manager,
200
+ llm_service=None,
201
+ config: Dict[str, Any] = None,
202
+ **kwargs):
203
+ """
204
+ Create agent with explicit dependencies (for advanced use cases)
205
+
206
+ Args:
207
+ agent_id: Agent identifier
208
+ memory_manager: Pre-configured memory manager
209
+ llm_service: Optional pre-configured LLM service
210
+ config: Optional configuration dictionary
211
+ **kwargs: Additional arguments passed to constructor
212
+
213
+ Returns:
214
+ YouTubeDownloadAgent: Agent with explicit dependencies
215
+ """
216
+ return cls(
217
+ agent_id=agent_id,
218
+ memory_manager=memory_manager,
219
+ llm_service=llm_service,
220
+ config=config,
221
+ auto_configure=False, # Disable auto-config when using advanced mode
222
+ **kwargs
223
+ )
224
+
225
+ async def _download_youtube(self, url: str, audio_only: bool = True, custom_filename: str = None) -> Dict[str, Any]:
226
+ """Download video or audio from YouTube"""
227
+ try:
228
+ if not self._is_valid_youtube_url(url):
229
+ return {"success": False, "error": f"Invalid YouTube URL: {url}"}
230
+
231
+ result = self.youtube_executor.download_youtube_video(
232
+ url=url,
233
+ audio_only=audio_only,
234
+ output_filename=custom_filename
235
+ )
236
+
237
+ if result['success']:
238
+ download_info = result.get('download_info', {})
239
+ return {
240
+ "success": True,
241
+ "message": f"Successfully downloaded {'audio' if audio_only else 'video'} from YouTube",
242
+ "url": url,
243
+ "audio_only": audio_only,
244
+ "file_path": download_info.get('final_path'),
245
+ "filename": download_info.get('filename'),
246
+ "file_size_bytes": download_info.get('size_bytes', 0),
247
+ "execution_time": result['execution_time'],
248
+ "custom_filename": custom_filename
249
+ }
250
+ else:
251
+ return result
252
+
253
+ except Exception as e:
254
+ return {"success": False, "error": str(e)}
255
+
256
+ async def _download_youtube_audio(self, url: str, custom_filename: str = None) -> Dict[str, Any]:
257
+ """Download audio only from YouTube"""
258
+ return await self._download_youtube(url, audio_only=True, custom_filename=custom_filename)
259
+
260
+ async def _download_youtube_video(self, url: str, custom_filename: str = None) -> Dict[str, Any]:
261
+ """Download video from YouTube"""
262
+ return await self._download_youtube(url, audio_only=False, custom_filename=custom_filename)
263
+
264
+ async def _get_youtube_info(self, url: str) -> Dict[str, Any]:
265
+ """Get YouTube video information"""
266
+ try:
267
+ if not self._is_valid_youtube_url(url):
268
+ return {"success": False, "error": f"Invalid YouTube URL: {url}"}
269
+
270
+ result = self.youtube_executor.get_video_info(url)
271
+
272
+ if result['success']:
273
+ return {
274
+ "success": True,
275
+ "message": "Successfully retrieved video information",
276
+ "url": url,
277
+ "video_info": result['video_info']
278
+ }
279
+ else:
280
+ return result
281
+
282
+ except Exception as e:
283
+ return {"success": False, "error": str(e)}
284
+
285
+ async def _batch_download_youtube(self, urls: List[str], audio_only: bool = True) -> Dict[str, Any]:
286
+ """Download multiple YouTube videos/audio"""
287
+ try:
288
+ results = []
289
+ successful = 0
290
+ failed = 0
291
+
292
+ for i, url in enumerate(urls):
293
+ try:
294
+ result = await self._download_youtube(url, audio_only=audio_only)
295
+ results.append(result)
296
+
297
+ if result.get('success', False):
298
+ successful += 1
299
+ else:
300
+ failed += 1
301
+
302
+ # Add delay between downloads to be respectful
303
+ if i < len(urls) - 1:
304
+ await asyncio.sleep(2)
305
+
306
+ except Exception as e:
307
+ results.append({
308
+ "success": False,
309
+ "url": url,
310
+ "error": str(e)
311
+ })
312
+ failed += 1
313
+
314
+ return {
315
+ "success": True,
316
+ "message": f"Batch download completed: {successful} successful, {failed} failed",
317
+ "total_urls": len(urls),
318
+ "successful": successful,
319
+ "failed": failed,
320
+ "audio_only": audio_only,
321
+ "results": results
322
+ }
323
+
324
+ except Exception as e:
325
+ return {"success": False, "error": str(e)}
326
+
327
+ def _is_valid_youtube_url(self, url: str) -> bool:
328
+ """Check if URL is a valid YouTube URL"""
329
+ youtube_patterns = [
330
+ r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/',
331
+ r'(https?://)?(www\.)?youtu\.be/',
332
+ r'(https?://)?(www\.)?youtube\.com/watch\?v=',
333
+ r'(https?://)?(www\.)?youtube\.com/embed/',
334
+ r'(https?://)?(www\.)?youtube\.com/v/',
335
+ ]
336
+
337
+ return any(re.match(pattern, url, re.IGNORECASE) for pattern in youtube_patterns)
338
+
339
+ def _extract_youtube_urls(self, text: str) -> List[str]:
340
+ """Extract YouTube URLs from text"""
341
+ youtube_patterns = [
342
+ r'https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+',
343
+ r'https?://(?:www\.)?youtu\.be/[\w-]+',
344
+ r'https?://(?:www\.)?youtube\.com/embed/[\w-]+',
345
+ r'https?://(?:www\.)?youtube\.com/v/[\w-]+',
346
+ ]
347
+
348
+ urls = []
349
+ for pattern in youtube_patterns:
350
+ urls.extend(re.findall(pattern, text, re.IGNORECASE))
351
+
352
+ return list(set(urls)) # Remove duplicates
353
+
354
+ async def process_message(self, message: AgentMessage, context: ExecutionContext) -> AgentMessage:
355
+ """Process incoming message and route to appropriate YouTube operations"""
356
+ self.memory.store_message(message)
357
+
358
+ try:
359
+ content = message.content.lower()
360
+ user_message = message.content
361
+
362
+ # Extract YouTube URLs from message
363
+ youtube_urls = self._extract_youtube_urls(user_message)
364
+
365
+ if youtube_urls:
366
+ # Determine if user wants audio or video
367
+ audio_keywords = ['audio', 'mp3', 'music', 'sound', 'song']
368
+ video_keywords = ['video', 'mp4', 'watch', 'visual']
369
+
370
+ wants_audio = any(keyword in content for keyword in audio_keywords)
371
+ wants_video = any(keyword in content for keyword in video_keywords)
372
+
373
+ # Default to audio unless video is explicitly requested
374
+ audio_only = not wants_video if wants_video else True
375
+
376
+ if len(youtube_urls) == 1:
377
+ # Single URL download
378
+ response_content = await self._handle_single_download(youtube_urls[0], audio_only, user_message,
379
+ context)
380
+ else:
381
+ # Multiple URL download
382
+ response_content = await self._handle_batch_download(youtube_urls, audio_only, user_message,
383
+ context)
384
+ else:
385
+ # No URLs found, provide help
386
+ response_content = await self._handle_general_request(user_message, context)
387
+
388
+ response = self.create_response(
389
+ content=response_content,
390
+ recipient_id=message.sender_id,
391
+ session_id=message.session_id,
392
+ conversation_id=message.conversation_id
393
+ )
394
+
395
+ self.memory.store_message(response)
396
+ return response
397
+
398
+ except Exception as e:
399
+ error_response = self.create_response(
400
+ content=f"YouTube Download Agent error: {str(e)}",
401
+ recipient_id=message.sender_id,
402
+ message_type=MessageType.ERROR,
403
+ session_id=message.session_id,
404
+ conversation_id=message.conversation_id
405
+ )
406
+ return error_response
407
+
408
+ async def _handle_single_download(self, url: str, audio_only: bool, user_message: str,
409
+ context: ExecutionContext) -> str:
410
+ """Handle single YouTube URL download"""
411
+ try:
412
+ # Check if user wants info only
413
+ if any(keyword in user_message.lower() for keyword in ['info', 'information', 'details', 'about']):
414
+ result = await self._get_youtube_info(url)
415
+
416
+ if result['success']:
417
+ video_info = result['video_info']
418
+ return f"""šŸ“¹ **YouTube Video Information**
419
+
420
+ **šŸŽ¬ Title:** {video_info.get('title', 'Unknown')}
421
+ **šŸ‘¤ Author:** {video_info.get('author', 'Unknown')}
422
+ **ā±ļø Duration:** {self._format_duration(video_info.get('duration', 0))}
423
+ **šŸ‘€ Views:** {video_info.get('views', 0):,}
424
+ **šŸ”— URL:** {url}
425
+
426
+ **šŸ“Š Available Streams:**
427
+ - Audio streams: {video_info.get('available_streams', {}).get('audio_streams', 0)}
428
+ - Video streams: {video_info.get('available_streams', {}).get('video_streams', 0)}
429
+ - Highest resolution: {video_info.get('available_streams', {}).get('highest_resolution', 'Unknown')}
430
+
431
+ Would you like me to download this video?"""
432
+ else:
433
+ return f"āŒ **Error getting video info:** {result['error']}"
434
+
435
+ # Proceed with download
436
+ result = await self._download_youtube(url, audio_only=audio_only)
437
+
438
+ if result['success']:
439
+ file_size_mb = result.get('file_size_bytes', 0) / (1024 * 1024)
440
+ return f"""āœ… **YouTube Download Completed**
441
+
442
+ **šŸŽÆ Type:** {'Audio' if audio_only else 'Video'}
443
+ **šŸ”— URL:** {url}
444
+ **šŸ“ File:** {result.get('filename', 'Unknown')}
445
+ **šŸ“ Location:** {result.get('file_path', 'Unknown')}
446
+ **šŸ“Š Size:** {file_size_mb:.2f} MB
447
+ **ā±ļø Download Time:** {result.get('execution_time', 0):.2f}s
448
+
449
+ Your {'audio' if audio_only else 'video'} has been successfully downloaded and is ready to use! šŸŽ‰"""
450
+ else:
451
+ return f"āŒ **Download Failed:** {result['error']}"
452
+
453
+ except Exception as e:
454
+ return f"āŒ **Error processing download:** {str(e)}"
455
+
456
+ async def _handle_batch_download(self, urls: List[str], audio_only: bool, user_message: str,
457
+ context: ExecutionContext) -> str:
458
+ """Handle batch YouTube URL downloads"""
459
+ try:
460
+ result = await self._batch_download_youtube(urls, audio_only=audio_only)
461
+
462
+ if result['success']:
463
+ successful = result['successful']
464
+ failed = result['failed']
465
+ total = result['total_urls']
466
+
467
+ response = f"""šŸ“¦ **Batch YouTube Download Completed**
468
+
469
+ **šŸ“Š Summary:**
470
+ - **Total URLs:** {total}
471
+ - **Successful:** {successful}
472
+ - **Failed:** {failed}
473
+ - **Type:** {'Audio' if audio_only else 'Video'}
474
+
475
+ """
476
+
477
+ if successful > 0:
478
+ response += "āœ… **Successfully Downloaded:**\n"
479
+ for i, download_result in enumerate(result['results'], 1):
480
+ if download_result.get('success', False):
481
+ response += f"{i}. {download_result.get('filename', 'Unknown')}\n"
482
+
483
+ if failed > 0:
484
+ response += f"\nāŒ **Failed Downloads:** {failed}\n"
485
+ for i, download_result in enumerate(result['results'], 1):
486
+ if not download_result.get('success', False):
487
+ response += f"{i}. {download_result.get('url', 'Unknown')}: {download_result.get('error', 'Unknown error')}\n"
488
+
489
+ response += f"\nšŸŽ‰ Batch download completed with {successful}/{total} successful downloads!"
490
+ return response
491
+ else:
492
+ return f"āŒ **Batch download failed:** {result['error']}"
493
+
494
+ except Exception as e:
495
+ return f"āŒ **Error processing batch download:** {str(e)}"
496
+
497
+ async def _handle_general_request(self, user_message: str, context: ExecutionContext) -> str:
498
+ """Handle general YouTube download requests"""
499
+ if self.llm_service:
500
+ prompt = f"""
501
+ You are a YouTube Download Agent specialized in downloading videos and audio from YouTube.
502
+
503
+ Your capabilities include:
504
+ - Downloading audio (MP3) from YouTube videos
505
+ - Downloading videos (MP4) from YouTube
506
+ - Getting video information without downloading
507
+ - Batch downloading multiple URLs
508
+ - Custom filename support
509
+
510
+ User message: {user_message}
511
+
512
+ Provide a helpful response about how you can assist with their YouTube download needs.
513
+ """
514
+
515
+ response = await self.llm_service.generate_response(prompt, context.metadata)
516
+ return response
517
+ else:
518
+ return ("I'm your YouTube Download Agent! I can help you with:\n\n"
519
+ "šŸŽµ **Audio Downloads**\n"
520
+ "- Download MP3 audio from YouTube videos\n"
521
+ "- High-quality audio extraction\n"
522
+ "- Custom filename support\n\n"
523
+ "šŸŽ„ **Video Downloads**\n"
524
+ "- Download MP4 videos in highest available quality\n"
525
+ "- Progressive download format\n"
526
+ "- Full video with audio\n\n"
527
+ "šŸ“Š **Video Information**\n"
528
+ "- Get video details without downloading\n"
529
+ "- Check duration, views, and available streams\n"
530
+ "- Thumbnail and metadata extraction\n\n"
531
+ "šŸ“¦ **Batch Operations**\n"
532
+ "- Download multiple videos at once\n"
533
+ "- Bulk audio/video processing\n\n"
534
+ "**šŸ“ Usage Examples:**\n"
535
+ "- 'Download audio from https://youtube.com/watch?v=example'\n"
536
+ "- 'Download video from https://youtube.com/watch?v=example'\n"
537
+ "- 'Get info about https://youtube.com/watch?v=example'\n"
538
+ "- 'Download https://youtube.com/watch?v=1 and https://youtube.com/watch?v=2'\n\n"
539
+ "Just paste any YouTube URL and I'll handle the download for you! šŸš€")
540
+
541
+ def _format_duration(self, seconds: int) -> str:
542
+ """Format duration in seconds to readable format"""
543
+ if seconds < 60:
544
+ return f"{seconds}s"
545
+ elif seconds < 3600:
546
+ minutes = seconds // 60
547
+ remaining_seconds = seconds % 60
548
+ return f"{minutes}m {remaining_seconds}s"
549
+ else:
550
+ hours = seconds // 3600
551
+ remaining_minutes = (seconds % 3600) // 60
552
+ remaining_seconds = seconds % 60
553
+ return f"{hours}h {remaining_minutes}m {remaining_seconds}s"