ambivo-agents 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ # ambivo_agents/executors/youtube_executor.py
2
+ """
3
+ YouTube Docker executor for downloading videos and audio from YouTube.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import time
9
+ import tempfile
10
+ import shutil
11
+ from pathlib import Path
12
+ from typing import Dict, Any
13
+
14
+ from ..config.loader import load_config, get_config_section
15
+
16
+ try:
17
+ import docker
18
+
19
+ DOCKER_AVAILABLE = True
20
+ except ImportError:
21
+ DOCKER_AVAILABLE = False
22
+
23
+
24
+ class YouTubeDockerExecutor:
25
+ """Specialized Docker executor for YouTube downloads with pytubefix"""
26
+
27
+ def __init__(self, config: Dict[str, Any] = None):
28
+ # Load from YAML if config not provided
29
+ if config is None:
30
+ try:
31
+ full_config = load_config()
32
+ config = get_config_section('youtube_download', full_config)
33
+ except Exception:
34
+ config = {}
35
+
36
+ self.config = config
37
+ self.work_dir = config.get("work_dir", '/opt/ambivo/work_dir')
38
+ self.docker_image = config.get("docker_image", "sgosain/amb-ubuntu-python-public-pod")
39
+ self.timeout = config.get("timeout", 600) # 10 minutes for downloads
40
+ self.memory_limit = config.get("memory_limit", "1g")
41
+
42
+ # YouTube specific directories
43
+ self.download_dir = Path(config.get("download_dir", "./youtube_downloads"))
44
+ self.default_audio_only = config.get("default_audio_only", True)
45
+
46
+ # Ensure directories exist
47
+ self.download_dir.mkdir(exist_ok=True)
48
+
49
+ if not DOCKER_AVAILABLE:
50
+ raise ImportError("Docker package is required for YouTube downloads")
51
+
52
+ try:
53
+ self.docker_client = docker.from_env()
54
+ self.docker_client.ping()
55
+ self.available = True
56
+ except Exception as e:
57
+ raise ConnectionError(f"Failed to connect to Docker for YouTube downloads: {e}")
58
+
59
+ def download_youtube_video(self,
60
+ url: str,
61
+ audio_only: bool = None,
62
+ output_filename: str = None) -> Dict[str, Any]:
63
+ """
64
+ Download video or audio from YouTube URL
65
+
66
+ Args:
67
+ url: YouTube URL to download
68
+ audio_only: If True, download only audio. If False, download video
69
+ output_filename: Custom filename (optional)
70
+ """
71
+ if audio_only is None:
72
+ audio_only = self.default_audio_only
73
+
74
+ try:
75
+ with tempfile.TemporaryDirectory() as temp_dir:
76
+ temp_path = Path(temp_dir)
77
+
78
+ # Create output directory in temp
79
+ container_output = temp_path / "output"
80
+ container_output.mkdir()
81
+
82
+ # Create the YouTube download script
83
+ download_script = self._create_download_script(url, audio_only, output_filename)
84
+
85
+ script_file = temp_path / "download_youtube.py"
86
+ script_file.write_text(download_script)
87
+
88
+ # Create execution script (no pip install needed)
89
+ execution_script = f"""#!/bin/bash
90
+ set -e
91
+ cd /workspace
92
+
93
+ echo "Starting YouTube download..."
94
+ echo "URL: {url}"
95
+ echo "Audio only: {audio_only}"
96
+
97
+ # Execute the download directly (pytubefix should be pre-installed)
98
+ python download_youtube.py
99
+
100
+ echo "YouTube download completed successfully"
101
+ ls -la /workspace/output/
102
+ """
103
+
104
+ exec_script_file = temp_path / "run_download.sh"
105
+ exec_script_file.write_text(execution_script)
106
+ exec_script_file.chmod(0o755)
107
+
108
+ # Container configuration for YouTube downloads
109
+ container_config = {
110
+ 'image': self.docker_image,
111
+ 'command': ["bash", "/workspace/run_download.sh"],
112
+ 'volumes': {str(temp_path): {'bind': '/workspace', 'mode': 'rw'}},
113
+ 'working_dir': '/workspace',
114
+ 'mem_limit': self.memory_limit,
115
+ 'network_disabled': False, # Need network for YouTube downloads
116
+ 'remove': True,
117
+ 'stdout': True,
118
+ 'stderr': True,
119
+ 'environment': {
120
+ 'PYTHONUNBUFFERED': '1',
121
+ 'PYTHONPATH': '/workspace'
122
+ }
123
+ }
124
+
125
+ start_time = time.time()
126
+
127
+ try:
128
+ result = self.docker_client.containers.run(**container_config)
129
+ execution_time = time.time() - start_time
130
+
131
+ output = result.decode('utf-8') if isinstance(result, bytes) else str(result)
132
+
133
+ # Check if output file was created
134
+ output_files = list(container_output.glob("*"))
135
+ output_info = {}
136
+
137
+ if output_files:
138
+ downloaded_file = output_files[0] # Take first output file
139
+ output_info = {
140
+ 'filename': downloaded_file.name,
141
+ 'size_bytes': downloaded_file.stat().st_size,
142
+ 'path': str(downloaded_file)
143
+ }
144
+
145
+ # Move output file to permanent location
146
+ permanent_output = self.download_dir / downloaded_file.name
147
+ shutil.move(str(downloaded_file), str(permanent_output))
148
+ output_info['final_path'] = str(permanent_output)
149
+
150
+ # Try to parse JSON result from the script output
151
+ try:
152
+ # Look for JSON in the output
153
+ for line in output.split('\n'):
154
+ if line.strip().startswith('{') and 'file_path' in line:
155
+ download_result = json.loads(line.strip())
156
+ output_info.update(download_result)
157
+ break
158
+ except:
159
+ pass # JSON parsing failed, use basic info
160
+
161
+ return {
162
+ 'success': True,
163
+ 'output': output,
164
+ 'execution_time': execution_time,
165
+ 'url': url,
166
+ 'audio_only': audio_only,
167
+ 'download_info': output_info,
168
+ 'temp_dir': str(temp_path)
169
+ }
170
+
171
+ except Exception as container_error:
172
+ return {
173
+ 'success': False,
174
+ 'error': f"Container execution failed: {str(container_error)}",
175
+ 'url': url,
176
+ 'execution_time': time.time() - start_time
177
+ }
178
+
179
+ except Exception as e:
180
+ return {
181
+ 'success': False,
182
+ 'error': f"YouTube download setup failed: {str(e)}",
183
+ 'url': url
184
+ }
185
+
186
+ def _create_download_script(self, url: str, audio_only: bool, output_filename: str = None) -> str:
187
+ """Create the Python script for downloading from YouTube"""
188
+
189
+ script = f'''#!/usr/bin/env python3
190
+ """
191
+ YouTube downloader script using pytubefix
192
+ """
193
+
194
+ import os
195
+ import json
196
+ import sys
197
+ from pathlib import Path
198
+
199
+ # Import required modules (should be pre-installed in container)
200
+ try:
201
+ from pydantic import BaseModel, Field
202
+ from pytubefix import YouTube
203
+ from pytubefix.cli import on_progress
204
+ except ImportError as e:
205
+ print(f"Import error: {{e}}", file=sys.stderr)
206
+ print("Required packages not available in container", file=sys.stderr)
207
+ sys.exit(1)
208
+
209
+
210
+ class DownloadResult(BaseModel):
211
+ file_path: str = Field(..., description="Path where the downloaded file is stored.")
212
+ title: str = Field(..., description="Sanitized title of the YouTube video.")
213
+ url: str = Field(..., description="Original URL of the YouTube video.")
214
+ thumbnail: str = Field(..., description="Thumbnail URL of the YouTube video.")
215
+ duration: int = Field(..., description="Duration of the video in seconds.")
216
+ file_size_bytes: int = Field(..., description="Size of the downloaded file in bytes.")
217
+
218
+
219
+ def sanitize_title(title: str) -> str:
220
+ """Remove special characters from the title."""
221
+ # Remove/replace problematic characters
222
+ title = title.replace('/', '_')
223
+ title = title.replace('\\\\', '_')
224
+ title = title.replace(':', '_')
225
+ title = title.replace('*', '_')
226
+ title = title.replace('?', '_')
227
+ title = title.replace('"', '_')
228
+ title = title.replace('<', '_')
229
+ title = title.replace('>', '_')
230
+ title = title.replace('|', '_')
231
+
232
+ # Keep only alphanumeric, spaces, hyphens, underscores
233
+ sanitized = ''.join(c for c in title if c.isalnum() or c in ' -_')
234
+
235
+ # Remove extra spaces and limit length
236
+ sanitized = ' '.join(sanitized.split())[:100]
237
+
238
+ return sanitized if sanitized else 'youtube_download'
239
+
240
+
241
+ def download_yt(url: str, audio_only: bool = True, output_dir: str = ".", custom_filename: str = None) -> DownloadResult:
242
+ """Download audio or video from a YouTube URL."""
243
+ try:
244
+ # Create YouTube object
245
+ yt = YouTube(url, on_progress_callback=on_progress)
246
+
247
+ # Get video info
248
+ title = sanitize_title(yt.title)
249
+ duration = yt.length
250
+ thumbnail_url = yt.thumbnail_url
251
+
252
+ # Use custom filename if provided
253
+ filename_base = custom_filename if custom_filename else title
254
+ filename_base = sanitize_title(filename_base)
255
+
256
+ if audio_only:
257
+ # Get audio stream
258
+ stream = yt.streams.filter(only_audio=True).first()
259
+ if not stream:
260
+ stream = yt.streams.get_audio_only()
261
+
262
+ extension = "mp3"
263
+ filename = filename_base + "." + extension
264
+ file_path = stream.download(output_path=output_dir, filename=filename)
265
+ else:
266
+ # Get highest resolution video stream
267
+ stream = yt.streams.get_highest_resolution()
268
+ if not stream:
269
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
270
+
271
+ extension = "mp4"
272
+ filename = filename_base + "." + extension
273
+ file_path = stream.download(output_path=output_dir, filename=filename)
274
+
275
+ # Get file size
276
+ file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
277
+
278
+ return DownloadResult(
279
+ file_path=file_path,
280
+ title=title,
281
+ url=url,
282
+ thumbnail=thumbnail_url,
283
+ duration=duration,
284
+ file_size_bytes=file_size
285
+ )
286
+
287
+ except Exception as e:
288
+ print(f"Error downloading {{url}}: {{e}}", file=sys.stderr)
289
+ raise
290
+
291
+
292
+ if __name__ == '__main__':
293
+ try:
294
+ url = "{url}"
295
+ audio_only = {audio_only} # This will be True or False, not string
296
+ output_dir = "/workspace/output"
297
+ custom_filename = {f'"{output_filename}"' if output_filename else 'None'}
298
+
299
+ print(f"Downloading from: {{url}}")
300
+ print(f"Audio only: {{audio_only}}")
301
+ print(f"Output directory: {{output_dir}}")
302
+
303
+ # Perform download
304
+ result = download_yt(
305
+ url=url,
306
+ audio_only=audio_only,
307
+ output_dir=output_dir,
308
+ custom_filename=custom_filename
309
+ )
310
+
311
+ # Output result as JSON for parsing
312
+ print("\\n" + "="*50)
313
+ print("DOWNLOAD RESULT:")
314
+ print(result.model_dump_json(indent=2))
315
+ print("="*50)
316
+
317
+ except Exception as e:
318
+ print(f"Download failed: {{e}}", file=sys.stderr)
319
+ sys.exit(1)
320
+ '''
321
+
322
+ return script
323
+
324
+ def get_video_info(self, url: str) -> Dict[str, Any]:
325
+ """Get video information without downloading"""
326
+
327
+ info_script = f'''#!/usr/bin/env python3
328
+ import json
329
+ import sys
330
+
331
+ try:
332
+ from pytubefix import YouTube
333
+ except ImportError as e:
334
+ print(f"Error: pytubefix not available: {{e}}", file=sys.stderr)
335
+ sys.exit(1)
336
+
337
+ try:
338
+ yt = YouTube("{url}")
339
+
340
+ info = {{
341
+ "title": yt.title,
342
+ "duration": yt.length,
343
+ "views": yt.views,
344
+ "thumbnail_url": yt.thumbnail_url,
345
+ "description": yt.description[:500] + "..." if len(yt.description) > 500 else yt.description,
346
+ "author": yt.author,
347
+ "publish_date": yt.publish_date.isoformat() if yt.publish_date else None,
348
+ "available_streams": {{
349
+ "audio_streams": len(yt.streams.filter(only_audio=True)),
350
+ "video_streams": len(yt.streams.filter(progressive=True)),
351
+ "highest_resolution": str(yt.streams.get_highest_resolution()),
352
+ "audio_only": str(yt.streams.get_audio_only())
353
+ }}
354
+ }}
355
+
356
+ print(json.dumps(info, indent=2))
357
+
358
+ except Exception as e:
359
+ print(f"Error getting video info: {{e}}", file=sys.stderr)
360
+ sys.exit(1)
361
+ '''
362
+
363
+ try:
364
+ with tempfile.TemporaryDirectory() as temp_dir:
365
+ temp_path = Path(temp_dir)
366
+
367
+ script_file = temp_path / "get_info.py"
368
+ script_file.write_text(info_script)
369
+
370
+ container_config = {
371
+ 'image': self.docker_image,
372
+ 'command': ["python", "/workspace/get_info.py"],
373
+ 'volumes': {str(temp_path): {'bind': '/workspace', 'mode': 'rw'}},
374
+ 'working_dir': '/workspace',
375
+ 'mem_limit': self.memory_limit,
376
+ 'network_disabled': False,
377
+ 'remove': True,
378
+ 'stdout': True,
379
+ 'stderr': True
380
+ }
381
+
382
+ result = self.docker_client.containers.run(**container_config)
383
+ output = result.decode('utf-8') if isinstance(result, bytes) else str(result)
384
+
385
+ try:
386
+ video_info = json.loads(output.strip())
387
+ return {
388
+ 'success': True,
389
+ 'video_info': video_info,
390
+ 'url': url
391
+ }
392
+ except json.JSONDecodeError:
393
+ return {
394
+ 'success': False,
395
+ 'error': 'Failed to parse video info',
396
+ 'raw_output': output
397
+ }
398
+
399
+ except Exception as e:
400
+ return {
401
+ 'success': False,
402
+ 'error': str(e),
403
+ 'url': url
404
+ }
@@ -0,0 +1,6 @@
1
+ # ambivo_agents/services/__init__.py
2
+ from .factory import AgentFactory
3
+ from .agent_service import AgentService, create_agent_service
4
+
5
+ __all__ = ["AgentFactory", "AgentService", "create_agent_service"]
6
+