camel-ai 0.2.23a0__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +16 -2
- camel/configs/anthropic_config.py +45 -11
- camel/datagen/self_improving_cot.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +46 -2
- camel/models/__init__.py +2 -0
- camel/models/anthropic_model.py +5 -1
- camel/models/base_audio_model.py +92 -0
- camel/models/fish_audio_model.py +18 -8
- camel/models/model_manager.py +9 -0
- camel/models/openai_audio_models.py +80 -1
- camel/societies/role_playing.py +119 -0
- camel/toolkits/__init__.py +17 -1
- camel/toolkits/audio_analysis_toolkit.py +238 -0
- camel/toolkits/excel_toolkit.py +172 -0
- camel/toolkits/file_write_toolkit.py +371 -0
- camel/toolkits/image_analysis_toolkit.py +202 -0
- camel/toolkits/mcp_toolkit.py +251 -0
- camel/toolkits/page_script.js +376 -0
- camel/toolkits/terminal_toolkit.py +421 -0
- camel/toolkits/video_analysis_toolkit.py +407 -0
- camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +19 -25
- camel/toolkits/web_toolkit.py +1306 -0
- camel/types/enums.py +3 -0
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/METADATA +241 -106
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/RECORD +57 -47
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.23a0.dist-info → camel_ai-0.2.24.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from PIL import Image
|
|
21
|
+
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
from camel.messages import BaseMessage
|
|
24
|
+
from camel.models import BaseModelBackend, OpenAIAudioModels
|
|
25
|
+
from camel.toolkits.base import BaseToolkit
|
|
26
|
+
from camel.toolkits.function_tool import FunctionTool
|
|
27
|
+
from camel.utils import dependencies_required
|
|
28
|
+
|
|
29
|
+
from .video_download_toolkit import (
|
|
30
|
+
VideoDownloaderToolkit,
|
|
31
|
+
_capture_screenshot,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
logger = get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
VIDEO_QA_PROMPT = """
|
|
37
|
+
Analyze the provided video frames and corresponding audio transcription to \
|
|
38
|
+
answer the given question(s) thoroughly and accurately.
|
|
39
|
+
|
|
40
|
+
Instructions:
|
|
41
|
+
1. Visual Analysis:
|
|
42
|
+
- Examine the video frames to identify visible entities.
|
|
43
|
+
- Differentiate objects, species, or features based on key attributes \
|
|
44
|
+
such as size, color, shape, texture, or behavior.
|
|
45
|
+
- Note significant groupings, interactions, or contextual patterns \
|
|
46
|
+
relevant to the analysis.
|
|
47
|
+
|
|
48
|
+
2. Audio Integration:
|
|
49
|
+
- Use the audio transcription to complement or clarify your visual \
|
|
50
|
+
observations.
|
|
51
|
+
- Identify names, descriptions, or contextual hints in the \
|
|
52
|
+
transcription that help confirm or refine your visual analysis.
|
|
53
|
+
|
|
54
|
+
3. Detailed Reasoning and Justification:
|
|
55
|
+
- Provide a brief explanation of how you identified and distinguished \
|
|
56
|
+
each species or object.
|
|
57
|
+
- Highlight specific features or contextual clues that informed \
|
|
58
|
+
your reasoning.
|
|
59
|
+
|
|
60
|
+
4. Comprehensive Answer:
|
|
61
|
+
- Specify the total number of distinct species or object types \
|
|
62
|
+
identified in the video.
|
|
63
|
+
- Describe the defining characteristics and any supporting evidence \
|
|
64
|
+
from the video and transcription.
|
|
65
|
+
|
|
66
|
+
5. Important Considerations:
|
|
67
|
+
- Pay close attention to subtle differences that could distinguish \
|
|
68
|
+
similar-looking species or objects
|
|
69
|
+
(e.g., juveniles vs. adults, closely related species).
|
|
70
|
+
- Provide concise yet complete explanations to ensure clarity.
|
|
71
|
+
|
|
72
|
+
**Audio Transcription:**
|
|
73
|
+
{audio_transcription}
|
|
74
|
+
|
|
75
|
+
**Question:**
|
|
76
|
+
{question}
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class VideoAnalysisToolkit(BaseToolkit):
|
|
81
|
+
r"""A class for analysing videos with vision-language model.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
download_directory (Optional[str], optional): The directory where the
|
|
85
|
+
video will be downloaded to. If not provided, video will be stored
|
|
86
|
+
in a temporary directory and will be cleaned up after use.
|
|
87
|
+
(default: :obj:`None`)
|
|
88
|
+
model (Optional[BaseModelBackend], optional): The model to use for
|
|
89
|
+
visual analysis. (default: :obj:`None`)
|
|
90
|
+
use_audio_transcription (bool, optional): Whether to enable audio
|
|
91
|
+
transcription using OpenAI's audio models. Requires a valid OpenAI
|
|
92
|
+
API key. When disabled, video analysis will be based solely on
|
|
93
|
+
visual content. (default: :obj:`False`)
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
@dependencies_required("ffmpeg", "scenedetect")
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
download_directory: Optional[str] = None,
|
|
100
|
+
model: Optional[BaseModelBackend] = None,
|
|
101
|
+
use_audio_transcription: bool = False,
|
|
102
|
+
) -> None:
|
|
103
|
+
self._cleanup = download_directory is None
|
|
104
|
+
self._temp_files: list[str] = [] # Track temporary files for cleanup
|
|
105
|
+
self._use_audio_transcription = use_audio_transcription
|
|
106
|
+
|
|
107
|
+
self._download_directory = Path(
|
|
108
|
+
download_directory or tempfile.mkdtemp()
|
|
109
|
+
).resolve()
|
|
110
|
+
|
|
111
|
+
self.video_downloader_toolkit = VideoDownloaderToolkit(
|
|
112
|
+
download_directory=str(self._download_directory)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
self._download_directory.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
except FileExistsError:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"{self._download_directory} is not a valid directory."
|
|
120
|
+
)
|
|
121
|
+
except OSError as e:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Error creating directory {self._download_directory}: {e}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
logger.info(f"Video will be downloaded to {self._download_directory}")
|
|
127
|
+
|
|
128
|
+
self.vl_model = model
|
|
129
|
+
# Ensure ChatAgent is initialized with a model if provided
|
|
130
|
+
if self.vl_model:
|
|
131
|
+
# Import ChatAgent at runtime to avoid circular imports
|
|
132
|
+
from camel.agents import ChatAgent
|
|
133
|
+
|
|
134
|
+
self.vl_agent = ChatAgent(model=self.vl_model)
|
|
135
|
+
else:
|
|
136
|
+
# If no model is provided, use default model in ChatAgent
|
|
137
|
+
# Import ChatAgent at runtime to avoid circular imports
|
|
138
|
+
from camel.agents import ChatAgent
|
|
139
|
+
|
|
140
|
+
self.vl_agent = ChatAgent()
|
|
141
|
+
logger.warning(
|
|
142
|
+
"No vision-language model provided. Using default model in"
|
|
143
|
+
" ChatAgent."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Initialize audio models only if audio transcription is enabled
|
|
147
|
+
self.audio_models = None
|
|
148
|
+
if self._use_audio_transcription:
|
|
149
|
+
try:
|
|
150
|
+
self.audio_models = OpenAIAudioModels()
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.warning(
|
|
153
|
+
f"Failed to initialize OpenAIAudioModels: {e}. "
|
|
154
|
+
"Audio transcription will be disabled."
|
|
155
|
+
)
|
|
156
|
+
self._use_audio_transcription = False
|
|
157
|
+
|
|
158
|
+
def __del__(self):
|
|
159
|
+
r"""Clean up temporary directories and files when the object is
|
|
160
|
+
destroyed.
|
|
161
|
+
"""
|
|
162
|
+
# Clean up temporary files
|
|
163
|
+
for temp_file in self._temp_files:
|
|
164
|
+
if os.path.exists(temp_file):
|
|
165
|
+
try:
|
|
166
|
+
os.remove(temp_file)
|
|
167
|
+
logger.debug(f"Removed temporary file: {temp_file}")
|
|
168
|
+
except OSError as e:
|
|
169
|
+
logger.warning(
|
|
170
|
+
f"Failed to remove temporary file {temp_file}: {e}"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Clean up temporary directory if needed
|
|
174
|
+
if self._cleanup and os.path.exists(self._download_directory):
|
|
175
|
+
try:
|
|
176
|
+
import shutil
|
|
177
|
+
|
|
178
|
+
shutil.rmtree(self._download_directory)
|
|
179
|
+
logger.debug(
|
|
180
|
+
f"Removed temporary directory: {self._download_directory}"
|
|
181
|
+
)
|
|
182
|
+
except OSError as e:
|
|
183
|
+
logger.warning(
|
|
184
|
+
f"Failed to remove temporary directory"
|
|
185
|
+
f" {self._download_directory}: {e}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _extract_audio_from_video(
|
|
189
|
+
self, video_path: str, output_format: str = "mp3"
|
|
190
|
+
) -> str:
|
|
191
|
+
r"""Extract audio from the video.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
video_path (str): The path to the video file.
|
|
195
|
+
output_format (str): The format of the audio file to be saved.
|
|
196
|
+
(default: :obj:`"mp3"`)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
str: The path to the audio file.
|
|
200
|
+
"""
|
|
201
|
+
import ffmpeg
|
|
202
|
+
|
|
203
|
+
# Handle case where video file doesn't have an extension
|
|
204
|
+
base_path = os.path.splitext(video_path)[0]
|
|
205
|
+
output_path = f"{base_path}.{output_format}"
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
(
|
|
209
|
+
ffmpeg.input(video_path)
|
|
210
|
+
.output(output_path, vn=None, acodec="libmp3lame")
|
|
211
|
+
.run(quiet=True)
|
|
212
|
+
)
|
|
213
|
+
# Track the audio file for cleanup
|
|
214
|
+
self._temp_files.append(output_path)
|
|
215
|
+
return output_path
|
|
216
|
+
except ffmpeg.Error as e:
|
|
217
|
+
error_message = f"FFmpeg-Python failed: {e}"
|
|
218
|
+
logger.error(error_message)
|
|
219
|
+
raise RuntimeError(error_message)
|
|
220
|
+
|
|
221
|
+
def _transcribe_audio(self, audio_path: str) -> str:
|
|
222
|
+
r"""Transcribe the audio of the video."""
|
|
223
|
+
# Check if audio transcription is enabled and audio models are
|
|
224
|
+
# available
|
|
225
|
+
if not self._use_audio_transcription or self.audio_models is None:
|
|
226
|
+
logger.warning("Audio transcription is disabled or not available")
|
|
227
|
+
return "No audio transcription available."
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
audio_transcript = self.audio_models.speech_to_text(audio_path)
|
|
231
|
+
if not audio_transcript:
|
|
232
|
+
logger.warning("Audio transcription returned empty result")
|
|
233
|
+
return "No audio transcription available."
|
|
234
|
+
return audio_transcript
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"Audio transcription failed: {e}")
|
|
237
|
+
return "Audio transcription failed."
|
|
238
|
+
|
|
239
|
+
def _extract_keyframes(
|
|
240
|
+
self, video_path: str, num_frames: int, threshold: float = 25.0
|
|
241
|
+
) -> List[Image.Image]:
|
|
242
|
+
r"""Extract keyframes from a video based on scene changes
|
|
243
|
+
and return them as PIL.Image.Image objects.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
video_path (str): Path to the video file.
|
|
247
|
+
num_frames (int): Number of keyframes to extract.
|
|
248
|
+
threshold (float): The threshold value for scene change detection.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
list: A list of PIL.Image.Image objects representing
|
|
252
|
+
the extracted keyframes.
|
|
253
|
+
"""
|
|
254
|
+
from scenedetect import ( # type: ignore[import-untyped]
|
|
255
|
+
SceneManager,
|
|
256
|
+
VideoManager,
|
|
257
|
+
)
|
|
258
|
+
from scenedetect.detectors import ( # type: ignore[import-untyped]
|
|
259
|
+
ContentDetector,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if num_frames <= 0:
|
|
263
|
+
logger.warning(
|
|
264
|
+
f"Invalid num_frames: {num_frames}, using default of 1"
|
|
265
|
+
)
|
|
266
|
+
num_frames = 1
|
|
267
|
+
|
|
268
|
+
video_manager = VideoManager([video_path])
|
|
269
|
+
scene_manager = SceneManager()
|
|
270
|
+
scene_manager.add_detector(ContentDetector(threshold=threshold))
|
|
271
|
+
|
|
272
|
+
video_manager.set_duration()
|
|
273
|
+
video_manager.start()
|
|
274
|
+
scene_manager.detect_scenes(video_manager)
|
|
275
|
+
|
|
276
|
+
scenes = scene_manager.get_scene_list()
|
|
277
|
+
keyframes: List[Image.Image] = []
|
|
278
|
+
|
|
279
|
+
# Handle case where no scenes are detected
|
|
280
|
+
if not scenes:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"No scenes detected in video, capturing frames at "
|
|
283
|
+
"regular intervals"
|
|
284
|
+
)
|
|
285
|
+
import cv2
|
|
286
|
+
|
|
287
|
+
cap = cv2.VideoCapture(video_path)
|
|
288
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
289
|
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
290
|
+
duration = total_frames / fps if fps > 0 else 0
|
|
291
|
+
|
|
292
|
+
if duration > 0 and total_frames > 0:
|
|
293
|
+
# Extract frames at regular intervals
|
|
294
|
+
interval = duration / min(num_frames, total_frames)
|
|
295
|
+
for i in range(min(num_frames, total_frames)):
|
|
296
|
+
time_sec = i * interval
|
|
297
|
+
frame = _capture_screenshot(video_path, time_sec)
|
|
298
|
+
keyframes.append(frame)
|
|
299
|
+
|
|
300
|
+
cap.release()
|
|
301
|
+
else:
|
|
302
|
+
# Extract frames from detected scenes
|
|
303
|
+
for start_time, _ in scenes:
|
|
304
|
+
if len(keyframes) >= num_frames:
|
|
305
|
+
break
|
|
306
|
+
frame = _capture_screenshot(video_path, start_time)
|
|
307
|
+
keyframes.append(frame)
|
|
308
|
+
|
|
309
|
+
if not keyframes:
|
|
310
|
+
logger.error("Failed to extract any keyframes from video")
|
|
311
|
+
raise ValueError("Failed to extract keyframes from video")
|
|
312
|
+
|
|
313
|
+
logger.info(f"Extracted {len(keyframes)} keyframes")
|
|
314
|
+
return keyframes
|
|
315
|
+
|
|
316
|
+
def ask_question_about_video(
|
|
317
|
+
self,
|
|
318
|
+
video_path: str,
|
|
319
|
+
question: str,
|
|
320
|
+
num_frames: int = 28,
|
|
321
|
+
) -> str:
|
|
322
|
+
r"""Ask a question about the video.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
video_path (str): The path to the video file.
|
|
326
|
+
It can be a local file or a URL (such as Youtube website).
|
|
327
|
+
question (str): The question to ask about the video.
|
|
328
|
+
num_frames (int): The number of frames to extract from the video.
|
|
329
|
+
To be adjusted based on the length of the video.
|
|
330
|
+
(default: :obj:`28`)
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
str: The answer to the question.
|
|
334
|
+
"""
|
|
335
|
+
from urllib.parse import urlparse
|
|
336
|
+
|
|
337
|
+
if not question:
|
|
338
|
+
raise ValueError("Question cannot be empty")
|
|
339
|
+
|
|
340
|
+
if num_frames <= 0:
|
|
341
|
+
logger.warning(
|
|
342
|
+
f"Invalid num_frames: {num_frames}, using default of 28"
|
|
343
|
+
)
|
|
344
|
+
num_frames = 28
|
|
345
|
+
|
|
346
|
+
parsed_url = urlparse(video_path)
|
|
347
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
348
|
+
|
|
349
|
+
downloaded_video_path = None
|
|
350
|
+
try:
|
|
351
|
+
if is_url:
|
|
352
|
+
downloaded_video_path = (
|
|
353
|
+
self.video_downloader_toolkit.download_video(video_path)
|
|
354
|
+
)
|
|
355
|
+
if not downloaded_video_path or not os.path.exists(
|
|
356
|
+
downloaded_video_path
|
|
357
|
+
):
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Failed to download video from {video_path}"
|
|
360
|
+
)
|
|
361
|
+
video_path = downloaded_video_path
|
|
362
|
+
|
|
363
|
+
if not os.path.exists(video_path):
|
|
364
|
+
raise FileNotFoundError(f"Video file not found: {video_path}")
|
|
365
|
+
|
|
366
|
+
audio_transcript = "No audio transcription available."
|
|
367
|
+
if self._use_audio_transcription:
|
|
368
|
+
audio_path = self._extract_audio_from_video(video_path)
|
|
369
|
+
audio_transcript = self._transcribe_audio(audio_path)
|
|
370
|
+
|
|
371
|
+
video_frames = self._extract_keyframes(video_path, num_frames)
|
|
372
|
+
prompt = VIDEO_QA_PROMPT.format(
|
|
373
|
+
audio_transcription=audio_transcript,
|
|
374
|
+
question=question,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
msg = BaseMessage.make_user_message(
|
|
378
|
+
role_name="User",
|
|
379
|
+
content=prompt,
|
|
380
|
+
image_list=video_frames,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
response = self.vl_agent.step(msg)
|
|
384
|
+
if not response or not response.msgs:
|
|
385
|
+
logger.error("Model returned empty response")
|
|
386
|
+
return (
|
|
387
|
+
"Failed to generate an answer. "
|
|
388
|
+
"The model returned an empty response."
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
answer = response.msgs[0].content
|
|
392
|
+
return answer
|
|
393
|
+
|
|
394
|
+
except Exception as e:
|
|
395
|
+
error_message = f"Error processing video: {e!s}"
|
|
396
|
+
logger.error(error_message)
|
|
397
|
+
return f"Error: {error_message}"
|
|
398
|
+
|
|
399
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
400
|
+
r"""Returns a list of FunctionTool objects representing the
|
|
401
|
+
functions in the toolkit.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
List[FunctionTool]: A list of FunctionTool objects representing
|
|
405
|
+
the functions in the toolkit.
|
|
406
|
+
"""
|
|
407
|
+
return [FunctionTool(self.ask_question_about_video)]
|
|
@@ -13,32 +13,19 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
import io
|
|
16
|
-
import logging
|
|
17
|
-
import re
|
|
18
16
|
import tempfile
|
|
19
17
|
from pathlib import Path
|
|
20
18
|
from typing import List, Optional
|
|
19
|
+
from urllib.parse import urlparse
|
|
21
20
|
|
|
22
21
|
from PIL import Image
|
|
23
22
|
|
|
23
|
+
from camel.logger import get_logger
|
|
24
24
|
from camel.toolkits.base import BaseToolkit
|
|
25
25
|
from camel.toolkits.function_tool import FunctionTool
|
|
26
26
|
from camel.utils import dependencies_required
|
|
27
27
|
|
|
28
|
-
logger =
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def _standardize_url(url: str) -> str:
|
|
32
|
-
r"""Standardize the given URL."""
|
|
33
|
-
# Special case for YouTube embed URLs
|
|
34
|
-
if "youtube.com/embed/" in url:
|
|
35
|
-
match = re.search(r"embed/([a-zA-Z0-9_-]+)", url)
|
|
36
|
-
if match:
|
|
37
|
-
return f"https://www.youtube.com/watch?v={match.group(1)}"
|
|
38
|
-
else:
|
|
39
|
-
raise ValueError(f"Invalid YouTube URL: {url}")
|
|
40
|
-
|
|
41
|
-
return url
|
|
28
|
+
logger = get_logger(__name__)
|
|
42
29
|
|
|
43
30
|
|
|
44
31
|
def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
|
|
@@ -119,7 +106,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
119
106
|
if self._cleanup:
|
|
120
107
|
shutil.rmtree(self._download_directory, ignore_errors=True)
|
|
121
108
|
|
|
122
|
-
def
|
|
109
|
+
def download_video(self, url: str) -> str:
|
|
123
110
|
r"""Download the video and optionally split it into chunks.
|
|
124
111
|
|
|
125
112
|
yt-dlp will detect if the video is downloaded automatically so there
|
|
@@ -149,18 +136,21 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
149
136
|
|
|
150
137
|
def get_video_bytes(
|
|
151
138
|
self,
|
|
152
|
-
|
|
139
|
+
video_path: str,
|
|
153
140
|
) -> bytes:
|
|
154
|
-
r"""Download video by the
|
|
141
|
+
r"""Download video by the path, and return the content in bytes.
|
|
155
142
|
|
|
156
143
|
Args:
|
|
157
|
-
|
|
144
|
+
video_path (str): The path to the video file.
|
|
158
145
|
|
|
159
146
|
Returns:
|
|
160
147
|
bytes: The video file content in bytes.
|
|
161
148
|
"""
|
|
162
|
-
|
|
163
|
-
|
|
149
|
+
parsed_url = urlparse(video_path)
|
|
150
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
151
|
+
if is_url:
|
|
152
|
+
video_path = self.download_video(video_path)
|
|
153
|
+
video_file = video_path
|
|
164
154
|
|
|
165
155
|
with open(video_file, 'rb') as f:
|
|
166
156
|
video_bytes = f.read()
|
|
@@ -168,7 +158,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
168
158
|
return video_bytes
|
|
169
159
|
|
|
170
160
|
def get_video_screenshots(
|
|
171
|
-
self,
|
|
161
|
+
self, video_path: str, amount: int
|
|
172
162
|
) -> List[Image.Image]:
|
|
173
163
|
r"""Capture screenshots from the video at specified timestamps or by
|
|
174
164
|
dividing the video into equal parts if an integer is provided.
|
|
@@ -182,8 +172,11 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
182
172
|
"""
|
|
183
173
|
import ffmpeg
|
|
184
174
|
|
|
185
|
-
|
|
186
|
-
|
|
175
|
+
parsed_url = urlparse(video_path)
|
|
176
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
177
|
+
if is_url:
|
|
178
|
+
video_path = self.download_video(video_path)
|
|
179
|
+
video_file = video_path
|
|
187
180
|
|
|
188
181
|
# Get the video length
|
|
189
182
|
try:
|
|
@@ -208,6 +201,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
208
201
|
the functions in the toolkit.
|
|
209
202
|
"""
|
|
210
203
|
return [
|
|
204
|
+
FunctionTool(self.download_video),
|
|
211
205
|
FunctionTool(self.get_video_bytes),
|
|
212
206
|
FunctionTool(self.get_video_screenshots),
|
|
213
207
|
]
|