ambivo-agents 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ambivo_agents/__init__.py +91 -0
- ambivo_agents/agents/__init__.py +21 -0
- ambivo_agents/agents/assistant.py +203 -0
- ambivo_agents/agents/code_executor.py +133 -0
- ambivo_agents/agents/code_executor2.py +222 -0
- ambivo_agents/agents/knowledge_base.py +935 -0
- ambivo_agents/agents/media_editor.py +992 -0
- ambivo_agents/agents/moderator.py +617 -0
- ambivo_agents/agents/simple_web_search.py +404 -0
- ambivo_agents/agents/web_scraper.py +1027 -0
- ambivo_agents/agents/web_search.py +933 -0
- ambivo_agents/agents/youtube_download.py +784 -0
- ambivo_agents/cli.py +699 -0
- ambivo_agents/config/__init__.py +4 -0
- ambivo_agents/config/loader.py +301 -0
- ambivo_agents/core/__init__.py +33 -0
- ambivo_agents/core/base.py +1024 -0
- ambivo_agents/core/history.py +606 -0
- ambivo_agents/core/llm.py +333 -0
- ambivo_agents/core/memory.py +640 -0
- ambivo_agents/executors/__init__.py +8 -0
- ambivo_agents/executors/docker_executor.py +108 -0
- ambivo_agents/executors/media_executor.py +237 -0
- ambivo_agents/executors/youtube_executor.py +404 -0
- ambivo_agents/services/__init__.py +6 -0
- ambivo_agents/services/agent_service.py +605 -0
- ambivo_agents/services/factory.py +370 -0
- ambivo_agents-1.0.1.dist-info/METADATA +1090 -0
- ambivo_agents-1.0.1.dist-info/RECORD +33 -0
- ambivo_agents-1.0.1.dist-info/WHEEL +5 -0
- ambivo_agents-1.0.1.dist-info/entry_points.txt +3 -0
- ambivo_agents-1.0.1.dist-info/licenses/LICENSE +21 -0
- ambivo_agents-1.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,404 @@
|
|
1
|
+
# ambivo_agents/executors/youtube_executor.py
|
2
|
+
"""
|
3
|
+
YouTube Docker executor for downloading videos and audio from YouTube.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import asyncio
|
7
|
+
import json
|
8
|
+
import time
|
9
|
+
import tempfile
|
10
|
+
import shutil
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Dict, Any
|
13
|
+
|
14
|
+
from ..config.loader import load_config, get_config_section
|
15
|
+
|
16
|
+
try:
|
17
|
+
import docker
|
18
|
+
|
19
|
+
DOCKER_AVAILABLE = True
|
20
|
+
except ImportError:
|
21
|
+
DOCKER_AVAILABLE = False
|
22
|
+
|
23
|
+
|
24
|
+
class YouTubeDockerExecutor:
|
25
|
+
"""Specialized Docker executor for YouTube downloads with pytubefix"""
|
26
|
+
|
27
|
+
def __init__(self, config: Dict[str, Any] = None):
|
28
|
+
# Load from YAML if config not provided
|
29
|
+
if config is None:
|
30
|
+
try:
|
31
|
+
full_config = load_config()
|
32
|
+
config = get_config_section('youtube_download', full_config)
|
33
|
+
except Exception:
|
34
|
+
config = {}
|
35
|
+
|
36
|
+
self.config = config
|
37
|
+
self.work_dir = config.get("work_dir", '/opt/ambivo/work_dir')
|
38
|
+
self.docker_image = config.get("docker_image", "sgosain/amb-ubuntu-python-public-pod")
|
39
|
+
self.timeout = config.get("timeout", 600) # 10 minutes for downloads
|
40
|
+
self.memory_limit = config.get("memory_limit", "1g")
|
41
|
+
|
42
|
+
# YouTube specific directories
|
43
|
+
self.download_dir = Path(config.get("download_dir", "./youtube_downloads"))
|
44
|
+
self.default_audio_only = config.get("default_audio_only", True)
|
45
|
+
|
46
|
+
# Ensure directories exist
|
47
|
+
self.download_dir.mkdir(exist_ok=True)
|
48
|
+
|
49
|
+
if not DOCKER_AVAILABLE:
|
50
|
+
raise ImportError("Docker package is required for YouTube downloads")
|
51
|
+
|
52
|
+
try:
|
53
|
+
self.docker_client = docker.from_env()
|
54
|
+
self.docker_client.ping()
|
55
|
+
self.available = True
|
56
|
+
except Exception as e:
|
57
|
+
raise ConnectionError(f"Failed to connect to Docker for YouTube downloads: {e}")
|
58
|
+
|
59
|
+
def download_youtube_video(self,
|
60
|
+
url: str,
|
61
|
+
audio_only: bool = None,
|
62
|
+
output_filename: str = None) -> Dict[str, Any]:
|
63
|
+
"""
|
64
|
+
Download video or audio from YouTube URL
|
65
|
+
|
66
|
+
Args:
|
67
|
+
url: YouTube URL to download
|
68
|
+
audio_only: If True, download only audio. If False, download video
|
69
|
+
output_filename: Custom filename (optional)
|
70
|
+
"""
|
71
|
+
if audio_only is None:
|
72
|
+
audio_only = self.default_audio_only
|
73
|
+
|
74
|
+
try:
|
75
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
76
|
+
temp_path = Path(temp_dir)
|
77
|
+
|
78
|
+
# Create output directory in temp
|
79
|
+
container_output = temp_path / "output"
|
80
|
+
container_output.mkdir()
|
81
|
+
|
82
|
+
# Create the YouTube download script
|
83
|
+
download_script = self._create_download_script(url, audio_only, output_filename)
|
84
|
+
|
85
|
+
script_file = temp_path / "download_youtube.py"
|
86
|
+
script_file.write_text(download_script)
|
87
|
+
|
88
|
+
# Create execution script (no pip install needed)
|
89
|
+
execution_script = f"""#!/bin/bash
|
90
|
+
set -e
|
91
|
+
cd /workspace
|
92
|
+
|
93
|
+
echo "Starting YouTube download..."
|
94
|
+
echo "URL: {url}"
|
95
|
+
echo "Audio only: {audio_only}"
|
96
|
+
|
97
|
+
# Execute the download directly (pytubefix should be pre-installed)
|
98
|
+
python download_youtube.py
|
99
|
+
|
100
|
+
echo "YouTube download completed successfully"
|
101
|
+
ls -la /workspace/output/
|
102
|
+
"""
|
103
|
+
|
104
|
+
exec_script_file = temp_path / "run_download.sh"
|
105
|
+
exec_script_file.write_text(execution_script)
|
106
|
+
exec_script_file.chmod(0o755)
|
107
|
+
|
108
|
+
# Container configuration for YouTube downloads
|
109
|
+
container_config = {
|
110
|
+
'image': self.docker_image,
|
111
|
+
'command': ["bash", "/workspace/run_download.sh"],
|
112
|
+
'volumes': {str(temp_path): {'bind': '/workspace', 'mode': 'rw'}},
|
113
|
+
'working_dir': '/workspace',
|
114
|
+
'mem_limit': self.memory_limit,
|
115
|
+
'network_disabled': False, # Need network for YouTube downloads
|
116
|
+
'remove': True,
|
117
|
+
'stdout': True,
|
118
|
+
'stderr': True,
|
119
|
+
'environment': {
|
120
|
+
'PYTHONUNBUFFERED': '1',
|
121
|
+
'PYTHONPATH': '/workspace'
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
start_time = time.time()
|
126
|
+
|
127
|
+
try:
|
128
|
+
result = self.docker_client.containers.run(**container_config)
|
129
|
+
execution_time = time.time() - start_time
|
130
|
+
|
131
|
+
output = result.decode('utf-8') if isinstance(result, bytes) else str(result)
|
132
|
+
|
133
|
+
# Check if output file was created
|
134
|
+
output_files = list(container_output.glob("*"))
|
135
|
+
output_info = {}
|
136
|
+
|
137
|
+
if output_files:
|
138
|
+
downloaded_file = output_files[0] # Take first output file
|
139
|
+
output_info = {
|
140
|
+
'filename': downloaded_file.name,
|
141
|
+
'size_bytes': downloaded_file.stat().st_size,
|
142
|
+
'path': str(downloaded_file)
|
143
|
+
}
|
144
|
+
|
145
|
+
# Move output file to permanent location
|
146
|
+
permanent_output = self.download_dir / downloaded_file.name
|
147
|
+
shutil.move(str(downloaded_file), str(permanent_output))
|
148
|
+
output_info['final_path'] = str(permanent_output)
|
149
|
+
|
150
|
+
# Try to parse JSON result from the script output
|
151
|
+
try:
|
152
|
+
# Look for JSON in the output
|
153
|
+
for line in output.split('\n'):
|
154
|
+
if line.strip().startswith('{') and 'file_path' in line:
|
155
|
+
download_result = json.loads(line.strip())
|
156
|
+
output_info.update(download_result)
|
157
|
+
break
|
158
|
+
except:
|
159
|
+
pass # JSON parsing failed, use basic info
|
160
|
+
|
161
|
+
return {
|
162
|
+
'success': True,
|
163
|
+
'output': output,
|
164
|
+
'execution_time': execution_time,
|
165
|
+
'url': url,
|
166
|
+
'audio_only': audio_only,
|
167
|
+
'download_info': output_info,
|
168
|
+
'temp_dir': str(temp_path)
|
169
|
+
}
|
170
|
+
|
171
|
+
except Exception as container_error:
|
172
|
+
return {
|
173
|
+
'success': False,
|
174
|
+
'error': f"Container execution failed: {str(container_error)}",
|
175
|
+
'url': url,
|
176
|
+
'execution_time': time.time() - start_time
|
177
|
+
}
|
178
|
+
|
179
|
+
except Exception as e:
|
180
|
+
return {
|
181
|
+
'success': False,
|
182
|
+
'error': f"YouTube download setup failed: {str(e)}",
|
183
|
+
'url': url
|
184
|
+
}
|
185
|
+
|
186
|
+
def _create_download_script(self, url: str, audio_only: bool, output_filename: str = None) -> str:
|
187
|
+
"""Create the Python script for downloading from YouTube"""
|
188
|
+
|
189
|
+
script = f'''#!/usr/bin/env python3
|
190
|
+
"""
|
191
|
+
YouTube downloader script using pytubefix
|
192
|
+
"""
|
193
|
+
|
194
|
+
import os
|
195
|
+
import json
|
196
|
+
import sys
|
197
|
+
from pathlib import Path
|
198
|
+
|
199
|
+
# Import required modules (should be pre-installed in container)
|
200
|
+
try:
|
201
|
+
from pydantic import BaseModel, Field
|
202
|
+
from pytubefix import YouTube
|
203
|
+
from pytubefix.cli import on_progress
|
204
|
+
except ImportError as e:
|
205
|
+
print(f"Import error: {{e}}", file=sys.stderr)
|
206
|
+
print("Required packages not available in container", file=sys.stderr)
|
207
|
+
sys.exit(1)
|
208
|
+
|
209
|
+
|
210
|
+
class DownloadResult(BaseModel):
|
211
|
+
file_path: str = Field(..., description="Path where the downloaded file is stored.")
|
212
|
+
title: str = Field(..., description="Sanitized title of the YouTube video.")
|
213
|
+
url: str = Field(..., description="Original URL of the YouTube video.")
|
214
|
+
thumbnail: str = Field(..., description="Thumbnail URL of the YouTube video.")
|
215
|
+
duration: int = Field(..., description="Duration of the video in seconds.")
|
216
|
+
file_size_bytes: int = Field(..., description="Size of the downloaded file in bytes.")
|
217
|
+
|
218
|
+
|
219
|
+
def sanitize_title(title: str) -> str:
|
220
|
+
"""Remove special characters from the title."""
|
221
|
+
# Remove/replace problematic characters
|
222
|
+
title = title.replace('/', '_')
|
223
|
+
title = title.replace('\\\\', '_')
|
224
|
+
title = title.replace(':', '_')
|
225
|
+
title = title.replace('*', '_')
|
226
|
+
title = title.replace('?', '_')
|
227
|
+
title = title.replace('"', '_')
|
228
|
+
title = title.replace('<', '_')
|
229
|
+
title = title.replace('>', '_')
|
230
|
+
title = title.replace('|', '_')
|
231
|
+
|
232
|
+
# Keep only alphanumeric, spaces, hyphens, underscores
|
233
|
+
sanitized = ''.join(c for c in title if c.isalnum() or c in ' -_')
|
234
|
+
|
235
|
+
# Remove extra spaces and limit length
|
236
|
+
sanitized = ' '.join(sanitized.split())[:100]
|
237
|
+
|
238
|
+
return sanitized if sanitized else 'youtube_download'
|
239
|
+
|
240
|
+
|
241
|
+
def download_yt(url: str, audio_only: bool = True, output_dir: str = ".", custom_filename: str = None) -> DownloadResult:
|
242
|
+
"""Download audio or video from a YouTube URL."""
|
243
|
+
try:
|
244
|
+
# Create YouTube object
|
245
|
+
yt = YouTube(url, on_progress_callback=on_progress)
|
246
|
+
|
247
|
+
# Get video info
|
248
|
+
title = sanitize_title(yt.title)
|
249
|
+
duration = yt.length
|
250
|
+
thumbnail_url = yt.thumbnail_url
|
251
|
+
|
252
|
+
# Use custom filename if provided
|
253
|
+
filename_base = custom_filename if custom_filename else title
|
254
|
+
filename_base = sanitize_title(filename_base)
|
255
|
+
|
256
|
+
if audio_only:
|
257
|
+
# Get audio stream
|
258
|
+
stream = yt.streams.filter(only_audio=True).first()
|
259
|
+
if not stream:
|
260
|
+
stream = yt.streams.get_audio_only()
|
261
|
+
|
262
|
+
extension = "mp3"
|
263
|
+
filename = filename_base + "." + extension
|
264
|
+
file_path = stream.download(output_path=output_dir, filename=filename)
|
265
|
+
else:
|
266
|
+
# Get highest resolution video stream
|
267
|
+
stream = yt.streams.get_highest_resolution()
|
268
|
+
if not stream:
|
269
|
+
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
|
270
|
+
|
271
|
+
extension = "mp4"
|
272
|
+
filename = filename_base + "." + extension
|
273
|
+
file_path = stream.download(output_path=output_dir, filename=filename)
|
274
|
+
|
275
|
+
# Get file size
|
276
|
+
file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
|
277
|
+
|
278
|
+
return DownloadResult(
|
279
|
+
file_path=file_path,
|
280
|
+
title=title,
|
281
|
+
url=url,
|
282
|
+
thumbnail=thumbnail_url,
|
283
|
+
duration=duration,
|
284
|
+
file_size_bytes=file_size
|
285
|
+
)
|
286
|
+
|
287
|
+
except Exception as e:
|
288
|
+
print(f"Error downloading {{url}}: {{e}}", file=sys.stderr)
|
289
|
+
raise
|
290
|
+
|
291
|
+
|
292
|
+
if __name__ == '__main__':
|
293
|
+
try:
|
294
|
+
url = "{url}"
|
295
|
+
audio_only = {audio_only} # This will be True or False, not string
|
296
|
+
output_dir = "/workspace/output"
|
297
|
+
custom_filename = {f'"{output_filename}"' if output_filename else 'None'}
|
298
|
+
|
299
|
+
print(f"Downloading from: {{url}}")
|
300
|
+
print(f"Audio only: {{audio_only}}")
|
301
|
+
print(f"Output directory: {{output_dir}}")
|
302
|
+
|
303
|
+
# Perform download
|
304
|
+
result = download_yt(
|
305
|
+
url=url,
|
306
|
+
audio_only=audio_only,
|
307
|
+
output_dir=output_dir,
|
308
|
+
custom_filename=custom_filename
|
309
|
+
)
|
310
|
+
|
311
|
+
# Output result as JSON for parsing
|
312
|
+
print("\\n" + "="*50)
|
313
|
+
print("DOWNLOAD RESULT:")
|
314
|
+
print(result.model_dump_json(indent=2))
|
315
|
+
print("="*50)
|
316
|
+
|
317
|
+
except Exception as e:
|
318
|
+
print(f"Download failed: {{e}}", file=sys.stderr)
|
319
|
+
sys.exit(1)
|
320
|
+
'''
|
321
|
+
|
322
|
+
return script
|
323
|
+
|
324
|
+
def get_video_info(self, url: str) -> Dict[str, Any]:
|
325
|
+
"""Get video information without downloading"""
|
326
|
+
|
327
|
+
info_script = f'''#!/usr/bin/env python3
|
328
|
+
import json
|
329
|
+
import sys
|
330
|
+
|
331
|
+
try:
|
332
|
+
from pytubefix import YouTube
|
333
|
+
except ImportError as e:
|
334
|
+
print(f"Error: pytubefix not available: {{e}}", file=sys.stderr)
|
335
|
+
sys.exit(1)
|
336
|
+
|
337
|
+
try:
|
338
|
+
yt = YouTube("{url}")
|
339
|
+
|
340
|
+
info = {{
|
341
|
+
"title": yt.title,
|
342
|
+
"duration": yt.length,
|
343
|
+
"views": yt.views,
|
344
|
+
"thumbnail_url": yt.thumbnail_url,
|
345
|
+
"description": yt.description[:500] + "..." if len(yt.description) > 500 else yt.description,
|
346
|
+
"author": yt.author,
|
347
|
+
"publish_date": yt.publish_date.isoformat() if yt.publish_date else None,
|
348
|
+
"available_streams": {{
|
349
|
+
"audio_streams": len(yt.streams.filter(only_audio=True)),
|
350
|
+
"video_streams": len(yt.streams.filter(progressive=True)),
|
351
|
+
"highest_resolution": str(yt.streams.get_highest_resolution()),
|
352
|
+
"audio_only": str(yt.streams.get_audio_only())
|
353
|
+
}}
|
354
|
+
}}
|
355
|
+
|
356
|
+
print(json.dumps(info, indent=2))
|
357
|
+
|
358
|
+
except Exception as e:
|
359
|
+
print(f"Error getting video info: {{e}}", file=sys.stderr)
|
360
|
+
sys.exit(1)
|
361
|
+
'''
|
362
|
+
|
363
|
+
try:
|
364
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
365
|
+
temp_path = Path(temp_dir)
|
366
|
+
|
367
|
+
script_file = temp_path / "get_info.py"
|
368
|
+
script_file.write_text(info_script)
|
369
|
+
|
370
|
+
container_config = {
|
371
|
+
'image': self.docker_image,
|
372
|
+
'command': ["python", "/workspace/get_info.py"],
|
373
|
+
'volumes': {str(temp_path): {'bind': '/workspace', 'mode': 'rw'}},
|
374
|
+
'working_dir': '/workspace',
|
375
|
+
'mem_limit': self.memory_limit,
|
376
|
+
'network_disabled': False,
|
377
|
+
'remove': True,
|
378
|
+
'stdout': True,
|
379
|
+
'stderr': True
|
380
|
+
}
|
381
|
+
|
382
|
+
result = self.docker_client.containers.run(**container_config)
|
383
|
+
output = result.decode('utf-8') if isinstance(result, bytes) else str(result)
|
384
|
+
|
385
|
+
try:
|
386
|
+
video_info = json.loads(output.strip())
|
387
|
+
return {
|
388
|
+
'success': True,
|
389
|
+
'video_info': video_info,
|
390
|
+
'url': url
|
391
|
+
}
|
392
|
+
except json.JSONDecodeError:
|
393
|
+
return {
|
394
|
+
'success': False,
|
395
|
+
'error': 'Failed to parse video info',
|
396
|
+
'raw_output': output
|
397
|
+
}
|
398
|
+
|
399
|
+
except Exception as e:
|
400
|
+
return {
|
401
|
+
'success': False,
|
402
|
+
'error': str(e),
|
403
|
+
'url': url
|
404
|
+
}
|