camel-ai 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +41 -0
- camel/agents/_utils.py +188 -0
- camel/agents/chat_agent.py +570 -965
- camel/agents/knowledge_graph_agent.py +7 -1
- camel/agents/multi_hop_generator_agent.py +1 -1
- camel/configs/base_config.py +10 -13
- camel/configs/deepseek_config.py +4 -30
- camel/configs/gemini_config.py +5 -31
- camel/configs/openai_config.py +14 -32
- camel/configs/qwen_config.py +36 -36
- camel/datagen/self_improving_cot.py +81 -3
- camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
- camel/datagen/self_instruct/self_instruct.py +52 -3
- camel/datasets/__init__.py +28 -0
- camel/datasets/base.py +969 -0
- camel/environments/__init__.py +16 -0
- camel/environments/base.py +503 -0
- camel/extractors/__init__.py +16 -0
- camel/extractors/base.py +263 -0
- camel/memories/agent_memories.py +16 -1
- camel/memories/blocks/chat_history_block.py +10 -2
- camel/memories/blocks/vectordb_block.py +1 -0
- camel/memories/context_creators/score_based.py +20 -3
- camel/memories/records.py +10 -0
- camel/messages/base.py +8 -8
- camel/models/__init__.py +2 -0
- camel/models/_utils.py +57 -0
- camel/models/aiml_model.py +48 -17
- camel/models/anthropic_model.py +41 -3
- camel/models/azure_openai_model.py +39 -3
- camel/models/base_audio_model.py +92 -0
- camel/models/base_model.py +88 -13
- camel/models/cohere_model.py +88 -11
- camel/models/deepseek_model.py +107 -45
- camel/models/fish_audio_model.py +18 -8
- camel/models/gemini_model.py +133 -15
- camel/models/groq_model.py +72 -10
- camel/models/internlm_model.py +14 -3
- camel/models/litellm_model.py +9 -2
- camel/models/mistral_model.py +42 -5
- camel/models/model_manager.py +57 -3
- camel/models/moonshot_model.py +33 -4
- camel/models/nemotron_model.py +32 -3
- camel/models/nvidia_model.py +43 -3
- camel/models/ollama_model.py +139 -17
- camel/models/openai_audio_models.py +87 -2
- camel/models/openai_compatible_model.py +37 -3
- camel/models/openai_model.py +158 -46
- camel/models/qwen_model.py +61 -4
- camel/models/reka_model.py +53 -3
- camel/models/samba_model.py +209 -4
- camel/models/sglang_model.py +153 -14
- camel/models/siliconflow_model.py +16 -3
- camel/models/stub_model.py +46 -4
- camel/models/togetherai_model.py +38 -3
- camel/models/vllm_model.py +37 -3
- camel/models/yi_model.py +36 -3
- camel/models/zhipuai_model.py +38 -3
- camel/retrievers/__init__.py +3 -0
- camel/retrievers/hybrid_retrival.py +237 -0
- camel/toolkits/__init__.py +15 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +4 -2
- camel/toolkits/audio_analysis_toolkit.py +238 -0
- camel/toolkits/base.py +22 -3
- camel/toolkits/code_execution.py +2 -0
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +38 -12
- camel/toolkits/excel_toolkit.py +172 -0
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/github_toolkit.py +5 -1
- camel/toolkits/google_maps_toolkit.py +2 -1
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +0 -3
- camel/toolkits/image_analysis_toolkit.py +202 -0
- camel/toolkits/linkedin_toolkit.py +3 -2
- camel/toolkits/meshy_toolkit.py +3 -2
- camel/toolkits/mineru_toolkit.py +2 -2
- camel/toolkits/networkx_toolkit.py +240 -0
- camel/toolkits/notion_toolkit.py +2 -0
- camel/toolkits/openbb_toolkit.py +3 -2
- camel/toolkits/page_script.js +376 -0
- camel/toolkits/reddit_toolkit.py +11 -3
- camel/toolkits/retrieval_toolkit.py +6 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -1
- camel/toolkits/stripe_toolkit.py +8 -2
- camel/toolkits/sympy_toolkit.py +6 -1
- camel/toolkits/video_analysis_toolkit.py +407 -0
- camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +21 -25
- camel/toolkits/web_toolkit.py +1307 -0
- camel/toolkits/whatsapp_toolkit.py +3 -2
- camel/toolkits/zapier_toolkit.py +191 -0
- camel/types/__init__.py +2 -2
- camel/types/agents/__init__.py +16 -0
- camel/types/agents/tool_calling_record.py +52 -0
- camel/types/enums.py +3 -0
- camel/types/openai_types.py +16 -14
- camel/utils/__init__.py +2 -1
- camel/utils/async_func.py +2 -2
- camel/utils/commons.py +114 -1
- camel/verifiers/__init__.py +23 -0
- camel/verifiers/base.py +340 -0
- camel/verifiers/models.py +82 -0
- camel/verifiers/python_verifier.py +202 -0
- camel_ai-0.2.23.dist-info/METADATA +671 -0
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info}/RECORD +122 -97
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info}/WHEEL +1 -1
- camel_ai-0.2.22.dist-info/METADATA +0 -527
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info/licenses}/LICENSE +0 -0
camel/toolkits/reddit_toolkit.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
16
|
import time
|
|
17
|
-
from typing import Any, Dict, List, Union
|
|
17
|
+
from typing import Any, Dict, List, Optional, Union
|
|
18
18
|
|
|
19
19
|
from camel.toolkits import FunctionTool
|
|
20
20
|
from camel.toolkits.base import BaseToolkit
|
|
@@ -30,11 +30,16 @@ class RedditToolkit(BaseToolkit):
|
|
|
30
30
|
|
|
31
31
|
Attributes:
|
|
32
32
|
retries (int): Number of retries for API requests in case of failure.
|
|
33
|
-
delay (
|
|
33
|
+
delay (float): Delay between retries in seconds.
|
|
34
34
|
reddit (Reddit): An instance of the Reddit client.
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
def __init__(
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
retries: int = 3,
|
|
40
|
+
delay: float = 0.0,
|
|
41
|
+
timeout: Optional[float] = None,
|
|
42
|
+
):
|
|
38
43
|
r"""Initializes the RedditToolkit with the specified number of retries
|
|
39
44
|
and delay.
|
|
40
45
|
|
|
@@ -43,7 +48,10 @@ class RedditToolkit(BaseToolkit):
|
|
|
43
48
|
failure. Defaults to `3`.
|
|
44
49
|
delay (int): Time in seconds to wait between retries. Defaults to
|
|
45
50
|
`0`.
|
|
51
|
+
timeout (float): Timeout for API requests in seconds. Defaults to
|
|
52
|
+
`None`.
|
|
46
53
|
"""
|
|
54
|
+
super().__init__(timeout=timeout)
|
|
47
55
|
from praw import Reddit # type: ignore[import-untyped]
|
|
48
56
|
|
|
49
57
|
self.retries = retries
|
|
@@ -27,8 +27,13 @@ class RetrievalToolkit(BaseToolkit):
|
|
|
27
27
|
storage system based on a specified query.
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
def __init__(
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
auto_retriever: Optional[AutoRetriever] = None,
|
|
33
|
+
timeout: Optional[float] = None,
|
|
34
|
+
) -> None:
|
|
31
35
|
r"""Initializes a new instance of the RetrievalToolkit class."""
|
|
36
|
+
super().__init__(timeout=timeout)
|
|
32
37
|
self.ar = auto_retriever or AutoRetriever(
|
|
33
38
|
vector_storage_local_path="camel/temp_storage",
|
|
34
39
|
storage_type=StorageType.QDRANT,
|
|
@@ -26,8 +26,9 @@ class SemanticScholarToolkit(BaseToolkit):
|
|
|
26
26
|
API to fetch paper and author data.
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
|
-
def __init__(self):
|
|
29
|
+
def __init__(self, timeout: Optional[float] = None):
|
|
30
30
|
r"""Initializes the SemanticScholarToolkit."""
|
|
31
|
+
super().__init__(timeout=timeout)
|
|
31
32
|
self.base_url = "https://api.semanticscholar.org/graph/v1"
|
|
32
33
|
|
|
33
34
|
def fetch_paper_data_title(
|
camel/toolkits/stripe_toolkit.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
|
-
from typing import List
|
|
18
|
+
from typing import List, Optional
|
|
19
19
|
|
|
20
20
|
from camel.toolkits import FunctionTool
|
|
21
21
|
from camel.toolkits.base import BaseToolkit
|
|
@@ -41,7 +41,12 @@ class StripeToolkit(BaseToolkit):
|
|
|
41
41
|
(None, "STRIPE_API_KEY"),
|
|
42
42
|
]
|
|
43
43
|
)
|
|
44
|
-
def __init__(
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
retries: int = 3,
|
|
47
|
+
timeout: Optional[float] = None,
|
|
48
|
+
):
|
|
49
|
+
super().__init__(timeout=timeout)
|
|
45
50
|
r"""Initializes the StripeToolkit with the specified number of
|
|
46
51
|
retries.
|
|
47
52
|
|
|
@@ -49,6 +54,7 @@ class StripeToolkit(BaseToolkit):
|
|
|
49
54
|
retries (int,optional): Number of times to retry the request in
|
|
50
55
|
case of failure. (default: :obj:`3`)
|
|
51
56
|
"""
|
|
57
|
+
super().__init__(timeout=timeout)
|
|
52
58
|
import stripe
|
|
53
59
|
|
|
54
60
|
stripe.max_network_retries = retries
|
camel/toolkits/sympy_toolkit.py
CHANGED
|
@@ -28,13 +28,18 @@ class SymPyToolkit(BaseToolkit):
|
|
|
28
28
|
and Linear Algebra.
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
-
def __init__(
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
default_variable: str = 'x',
|
|
34
|
+
timeout: Optional[float] = None,
|
|
35
|
+
):
|
|
32
36
|
r"""Initializes the toolkit with a default variable and logging.
|
|
33
37
|
|
|
34
38
|
Args:
|
|
35
39
|
default_variable (str): The default variable for
|
|
36
40
|
operations (default: :obj: `x`)
|
|
37
41
|
"""
|
|
42
|
+
super().__init__(timeout=timeout)
|
|
38
43
|
self.default_variable = default_variable
|
|
39
44
|
logger.info(f"Default variable set to: {self.default_variable}")
|
|
40
45
|
|
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from PIL import Image
|
|
21
|
+
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
from camel.messages import BaseMessage
|
|
24
|
+
from camel.models import BaseModelBackend, OpenAIAudioModels
|
|
25
|
+
from camel.toolkits.base import BaseToolkit
|
|
26
|
+
from camel.toolkits.function_tool import FunctionTool
|
|
27
|
+
from camel.utils import dependencies_required
|
|
28
|
+
|
|
29
|
+
from .video_download_toolkit import (
|
|
30
|
+
VideoDownloaderToolkit,
|
|
31
|
+
_capture_screenshot,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
logger = get_logger(__name__)
|
|
35
|
+
|
|
36
|
+
VIDEO_QA_PROMPT = """
|
|
37
|
+
Analyze the provided video frames and corresponding audio transcription to \
|
|
38
|
+
answer the given question(s) thoroughly and accurately.
|
|
39
|
+
|
|
40
|
+
Instructions:
|
|
41
|
+
1. Visual Analysis:
|
|
42
|
+
- Examine the video frames to identify visible entities.
|
|
43
|
+
- Differentiate objects, species, or features based on key attributes \
|
|
44
|
+
such as size, color, shape, texture, or behavior.
|
|
45
|
+
- Note significant groupings, interactions, or contextual patterns \
|
|
46
|
+
relevant to the analysis.
|
|
47
|
+
|
|
48
|
+
2. Audio Integration:
|
|
49
|
+
- Use the audio transcription to complement or clarify your visual \
|
|
50
|
+
observations.
|
|
51
|
+
- Identify names, descriptions, or contextual hints in the \
|
|
52
|
+
transcription that help confirm or refine your visual analysis.
|
|
53
|
+
|
|
54
|
+
3. Detailed Reasoning and Justification:
|
|
55
|
+
- Provide a brief explanation of how you identified and distinguished \
|
|
56
|
+
each species or object.
|
|
57
|
+
- Highlight specific features or contextual clues that informed \
|
|
58
|
+
your reasoning.
|
|
59
|
+
|
|
60
|
+
4. Comprehensive Answer:
|
|
61
|
+
- Specify the total number of distinct species or object types \
|
|
62
|
+
identified in the video.
|
|
63
|
+
- Describe the defining characteristics and any supporting evidence \
|
|
64
|
+
from the video and transcription.
|
|
65
|
+
|
|
66
|
+
5. Important Considerations:
|
|
67
|
+
- Pay close attention to subtle differences that could distinguish \
|
|
68
|
+
similar-looking species or objects
|
|
69
|
+
(e.g., juveniles vs. adults, closely related species).
|
|
70
|
+
- Provide concise yet complete explanations to ensure clarity.
|
|
71
|
+
|
|
72
|
+
**Audio Transcription:**
|
|
73
|
+
{audio_transcription}
|
|
74
|
+
|
|
75
|
+
**Question:**
|
|
76
|
+
{question}
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class VideoAnalysisToolkit(BaseToolkit):
|
|
81
|
+
r"""A class for analysing videos with vision-language model.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
download_directory (Optional[str], optional): The directory where the
|
|
85
|
+
video will be downloaded to. If not provided, video will be stored
|
|
86
|
+
in a temporary directory and will be cleaned up after use.
|
|
87
|
+
(default: :obj:`None`)
|
|
88
|
+
model (Optional[BaseModelBackend], optional): The model to use for
|
|
89
|
+
visual analysis. (default: :obj:`None`)
|
|
90
|
+
use_audio_transcription (bool, optional): Whether to enable audio
|
|
91
|
+
transcription using OpenAI's audio models. Requires a valid OpenAI
|
|
92
|
+
API key. When disabled, video analysis will be based solely on
|
|
93
|
+
visual content. (default: :obj:`False`)
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
@dependencies_required("ffmpeg", "scenedetect")
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
download_directory: Optional[str] = None,
|
|
100
|
+
model: Optional[BaseModelBackend] = None,
|
|
101
|
+
use_audio_transcription: bool = False,
|
|
102
|
+
) -> None:
|
|
103
|
+
self._cleanup = download_directory is None
|
|
104
|
+
self._temp_files: list[str] = [] # Track temporary files for cleanup
|
|
105
|
+
self._use_audio_transcription = use_audio_transcription
|
|
106
|
+
|
|
107
|
+
self._download_directory = Path(
|
|
108
|
+
download_directory or tempfile.mkdtemp()
|
|
109
|
+
).resolve()
|
|
110
|
+
|
|
111
|
+
self.video_downloader_toolkit = VideoDownloaderToolkit(
|
|
112
|
+
download_directory=str(self._download_directory)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
self._download_directory.mkdir(parents=True, exist_ok=True)
|
|
117
|
+
except FileExistsError:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"{self._download_directory} is not a valid directory."
|
|
120
|
+
)
|
|
121
|
+
except OSError as e:
|
|
122
|
+
raise ValueError(
|
|
123
|
+
f"Error creating directory {self._download_directory}: {e}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
logger.info(f"Video will be downloaded to {self._download_directory}")
|
|
127
|
+
|
|
128
|
+
self.vl_model = model
|
|
129
|
+
# Ensure ChatAgent is initialized with a model if provided
|
|
130
|
+
if self.vl_model:
|
|
131
|
+
# Import ChatAgent at runtime to avoid circular imports
|
|
132
|
+
from camel.agents import ChatAgent
|
|
133
|
+
|
|
134
|
+
self.vl_agent = ChatAgent(model=self.vl_model)
|
|
135
|
+
else:
|
|
136
|
+
# If no model is provided, use default model in ChatAgent
|
|
137
|
+
# Import ChatAgent at runtime to avoid circular imports
|
|
138
|
+
from camel.agents import ChatAgent
|
|
139
|
+
|
|
140
|
+
self.vl_agent = ChatAgent()
|
|
141
|
+
logger.warning(
|
|
142
|
+
"No vision-language model provided. Using default model in"
|
|
143
|
+
" ChatAgent."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Initialize audio models only if audio transcription is enabled
|
|
147
|
+
self.audio_models = None
|
|
148
|
+
if self._use_audio_transcription:
|
|
149
|
+
try:
|
|
150
|
+
self.audio_models = OpenAIAudioModels()
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.warning(
|
|
153
|
+
f"Failed to initialize OpenAIAudioModels: {e}. "
|
|
154
|
+
"Audio transcription will be disabled."
|
|
155
|
+
)
|
|
156
|
+
self._use_audio_transcription = False
|
|
157
|
+
|
|
158
|
+
def __del__(self):
|
|
159
|
+
r"""Clean up temporary directories and files when the object is
|
|
160
|
+
destroyed.
|
|
161
|
+
"""
|
|
162
|
+
# Clean up temporary files
|
|
163
|
+
for temp_file in self._temp_files:
|
|
164
|
+
if os.path.exists(temp_file):
|
|
165
|
+
try:
|
|
166
|
+
os.remove(temp_file)
|
|
167
|
+
logger.debug(f"Removed temporary file: {temp_file}")
|
|
168
|
+
except OSError as e:
|
|
169
|
+
logger.warning(
|
|
170
|
+
f"Failed to remove temporary file {temp_file}: {e}"
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Clean up temporary directory if needed
|
|
174
|
+
if self._cleanup and os.path.exists(self._download_directory):
|
|
175
|
+
try:
|
|
176
|
+
import shutil
|
|
177
|
+
|
|
178
|
+
shutil.rmtree(self._download_directory)
|
|
179
|
+
logger.debug(
|
|
180
|
+
f"Removed temporary directory: {self._download_directory}"
|
|
181
|
+
)
|
|
182
|
+
except OSError as e:
|
|
183
|
+
logger.warning(
|
|
184
|
+
f"Failed to remove temporary directory"
|
|
185
|
+
f" {self._download_directory}: {e}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _extract_audio_from_video(
|
|
189
|
+
self, video_path: str, output_format: str = "mp3"
|
|
190
|
+
) -> str:
|
|
191
|
+
r"""Extract audio from the video.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
video_path (str): The path to the video file.
|
|
195
|
+
output_format (str): The format of the audio file to be saved.
|
|
196
|
+
(default: :obj:`"mp3"`)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
str: The path to the audio file.
|
|
200
|
+
"""
|
|
201
|
+
import ffmpeg
|
|
202
|
+
|
|
203
|
+
# Handle case where video file doesn't have an extension
|
|
204
|
+
base_path = os.path.splitext(video_path)[0]
|
|
205
|
+
output_path = f"{base_path}.{output_format}"
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
(
|
|
209
|
+
ffmpeg.input(video_path)
|
|
210
|
+
.output(output_path, vn=None, acodec="libmp3lame")
|
|
211
|
+
.run(quiet=True)
|
|
212
|
+
)
|
|
213
|
+
# Track the audio file for cleanup
|
|
214
|
+
self._temp_files.append(output_path)
|
|
215
|
+
return output_path
|
|
216
|
+
except ffmpeg.Error as e:
|
|
217
|
+
error_message = f"FFmpeg-Python failed: {e}"
|
|
218
|
+
logger.error(error_message)
|
|
219
|
+
raise RuntimeError(error_message)
|
|
220
|
+
|
|
221
|
+
def _transcribe_audio(self, audio_path: str) -> str:
|
|
222
|
+
r"""Transcribe the audio of the video."""
|
|
223
|
+
# Check if audio transcription is enabled and audio models are
|
|
224
|
+
# available
|
|
225
|
+
if not self._use_audio_transcription or self.audio_models is None:
|
|
226
|
+
logger.warning("Audio transcription is disabled or not available")
|
|
227
|
+
return "No audio transcription available."
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
audio_transcript = self.audio_models.speech_to_text(audio_path)
|
|
231
|
+
if not audio_transcript:
|
|
232
|
+
logger.warning("Audio transcription returned empty result")
|
|
233
|
+
return "No audio transcription available."
|
|
234
|
+
return audio_transcript
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"Audio transcription failed: {e}")
|
|
237
|
+
return "Audio transcription failed."
|
|
238
|
+
|
|
239
|
+
def _extract_keyframes(
|
|
240
|
+
self, video_path: str, num_frames: int, threshold: float = 25.0
|
|
241
|
+
) -> List[Image.Image]:
|
|
242
|
+
r"""Extract keyframes from a video based on scene changes
|
|
243
|
+
and return them as PIL.Image.Image objects.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
video_path (str): Path to the video file.
|
|
247
|
+
num_frames (int): Number of keyframes to extract.
|
|
248
|
+
threshold (float): The threshold value for scene change detection.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
list: A list of PIL.Image.Image objects representing
|
|
252
|
+
the extracted keyframes.
|
|
253
|
+
"""
|
|
254
|
+
from scenedetect import ( # type: ignore[import-untyped]
|
|
255
|
+
SceneManager,
|
|
256
|
+
VideoManager,
|
|
257
|
+
)
|
|
258
|
+
from scenedetect.detectors import ( # type: ignore[import-untyped]
|
|
259
|
+
ContentDetector,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if num_frames <= 0:
|
|
263
|
+
logger.warning(
|
|
264
|
+
f"Invalid num_frames: {num_frames}, using default of 1"
|
|
265
|
+
)
|
|
266
|
+
num_frames = 1
|
|
267
|
+
|
|
268
|
+
video_manager = VideoManager([video_path])
|
|
269
|
+
scene_manager = SceneManager()
|
|
270
|
+
scene_manager.add_detector(ContentDetector(threshold=threshold))
|
|
271
|
+
|
|
272
|
+
video_manager.set_duration()
|
|
273
|
+
video_manager.start()
|
|
274
|
+
scene_manager.detect_scenes(video_manager)
|
|
275
|
+
|
|
276
|
+
scenes = scene_manager.get_scene_list()
|
|
277
|
+
keyframes: List[Image.Image] = []
|
|
278
|
+
|
|
279
|
+
# Handle case where no scenes are detected
|
|
280
|
+
if not scenes:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"No scenes detected in video, capturing frames at "
|
|
283
|
+
"regular intervals"
|
|
284
|
+
)
|
|
285
|
+
import cv2
|
|
286
|
+
|
|
287
|
+
cap = cv2.VideoCapture(video_path)
|
|
288
|
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
289
|
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
290
|
+
duration = total_frames / fps if fps > 0 else 0
|
|
291
|
+
|
|
292
|
+
if duration > 0 and total_frames > 0:
|
|
293
|
+
# Extract frames at regular intervals
|
|
294
|
+
interval = duration / min(num_frames, total_frames)
|
|
295
|
+
for i in range(min(num_frames, total_frames)):
|
|
296
|
+
time_sec = i * interval
|
|
297
|
+
frame = _capture_screenshot(video_path, time_sec)
|
|
298
|
+
keyframes.append(frame)
|
|
299
|
+
|
|
300
|
+
cap.release()
|
|
301
|
+
else:
|
|
302
|
+
# Extract frames from detected scenes
|
|
303
|
+
for start_time, _ in scenes:
|
|
304
|
+
if len(keyframes) >= num_frames:
|
|
305
|
+
break
|
|
306
|
+
frame = _capture_screenshot(video_path, start_time)
|
|
307
|
+
keyframes.append(frame)
|
|
308
|
+
|
|
309
|
+
if not keyframes:
|
|
310
|
+
logger.error("Failed to extract any keyframes from video")
|
|
311
|
+
raise ValueError("Failed to extract keyframes from video")
|
|
312
|
+
|
|
313
|
+
logger.info(f"Extracted {len(keyframes)} keyframes")
|
|
314
|
+
return keyframes
|
|
315
|
+
|
|
316
|
+
def ask_question_about_video(
|
|
317
|
+
self,
|
|
318
|
+
video_path: str,
|
|
319
|
+
question: str,
|
|
320
|
+
num_frames: int = 28,
|
|
321
|
+
) -> str:
|
|
322
|
+
r"""Ask a question about the video.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
video_path (str): The path to the video file.
|
|
326
|
+
It can be a local file or a URL (such as Youtube website).
|
|
327
|
+
question (str): The question to ask about the video.
|
|
328
|
+
num_frames (int): The number of frames to extract from the video.
|
|
329
|
+
To be adjusted based on the length of the video.
|
|
330
|
+
(default: :obj:`28`)
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
str: The answer to the question.
|
|
334
|
+
"""
|
|
335
|
+
from urllib.parse import urlparse
|
|
336
|
+
|
|
337
|
+
if not question:
|
|
338
|
+
raise ValueError("Question cannot be empty")
|
|
339
|
+
|
|
340
|
+
if num_frames <= 0:
|
|
341
|
+
logger.warning(
|
|
342
|
+
f"Invalid num_frames: {num_frames}, using default of 28"
|
|
343
|
+
)
|
|
344
|
+
num_frames = 28
|
|
345
|
+
|
|
346
|
+
parsed_url = urlparse(video_path)
|
|
347
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
348
|
+
|
|
349
|
+
downloaded_video_path = None
|
|
350
|
+
try:
|
|
351
|
+
if is_url:
|
|
352
|
+
downloaded_video_path = (
|
|
353
|
+
self.video_downloader_toolkit.download_video(video_path)
|
|
354
|
+
)
|
|
355
|
+
if not downloaded_video_path or not os.path.exists(
|
|
356
|
+
downloaded_video_path
|
|
357
|
+
):
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Failed to download video from {video_path}"
|
|
360
|
+
)
|
|
361
|
+
video_path = downloaded_video_path
|
|
362
|
+
|
|
363
|
+
if not os.path.exists(video_path):
|
|
364
|
+
raise FileNotFoundError(f"Video file not found: {video_path}")
|
|
365
|
+
|
|
366
|
+
audio_transcript = "No audio transcription available."
|
|
367
|
+
if self._use_audio_transcription:
|
|
368
|
+
audio_path = self._extract_audio_from_video(video_path)
|
|
369
|
+
audio_transcript = self._transcribe_audio(audio_path)
|
|
370
|
+
|
|
371
|
+
video_frames = self._extract_keyframes(video_path, num_frames)
|
|
372
|
+
prompt = VIDEO_QA_PROMPT.format(
|
|
373
|
+
audio_transcription=audio_transcript,
|
|
374
|
+
question=question,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
msg = BaseMessage.make_user_message(
|
|
378
|
+
role_name="User",
|
|
379
|
+
content=prompt,
|
|
380
|
+
image_list=video_frames,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
response = self.vl_agent.step(msg)
|
|
384
|
+
if not response or not response.msgs:
|
|
385
|
+
logger.error("Model returned empty response")
|
|
386
|
+
return (
|
|
387
|
+
"Failed to generate an answer. "
|
|
388
|
+
"The model returned an empty response."
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
answer = response.msgs[0].content
|
|
392
|
+
return answer
|
|
393
|
+
|
|
394
|
+
except Exception as e:
|
|
395
|
+
error_message = f"Error processing video: {e!s}"
|
|
396
|
+
logger.error(error_message)
|
|
397
|
+
return f"Error: {error_message}"
|
|
398
|
+
|
|
399
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
400
|
+
r"""Returns a list of FunctionTool objects representing the
|
|
401
|
+
functions in the toolkit.
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
List[FunctionTool]: A list of FunctionTool objects representing
|
|
405
|
+
the functions in the toolkit.
|
|
406
|
+
"""
|
|
407
|
+
return [FunctionTool(self.ask_question_about_video)]
|
|
@@ -13,32 +13,19 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
import io
|
|
16
|
-
import logging
|
|
17
|
-
import re
|
|
18
16
|
import tempfile
|
|
19
17
|
from pathlib import Path
|
|
20
18
|
from typing import List, Optional
|
|
19
|
+
from urllib.parse import urlparse
|
|
21
20
|
|
|
22
21
|
from PIL import Image
|
|
23
22
|
|
|
23
|
+
from camel.logger import get_logger
|
|
24
24
|
from camel.toolkits.base import BaseToolkit
|
|
25
25
|
from camel.toolkits.function_tool import FunctionTool
|
|
26
26
|
from camel.utils import dependencies_required
|
|
27
27
|
|
|
28
|
-
logger =
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def _standardize_url(url: str) -> str:
|
|
32
|
-
r"""Standardize the given URL."""
|
|
33
|
-
# Special case for YouTube embed URLs
|
|
34
|
-
if "youtube.com/embed/" in url:
|
|
35
|
-
match = re.search(r"embed/([a-zA-Z0-9_-]+)", url)
|
|
36
|
-
if match:
|
|
37
|
-
return f"https://www.youtube.com/watch?v={match.group(1)}"
|
|
38
|
-
else:
|
|
39
|
-
raise ValueError(f"Invalid YouTube URL: {url}")
|
|
40
|
-
|
|
41
|
-
return url
|
|
28
|
+
logger = get_logger(__name__)
|
|
42
29
|
|
|
43
30
|
|
|
44
31
|
def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
|
|
@@ -85,7 +72,9 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
85
72
|
self,
|
|
86
73
|
download_directory: Optional[str] = None,
|
|
87
74
|
cookies_path: Optional[str] = None,
|
|
75
|
+
timeout: Optional[float] = None,
|
|
88
76
|
) -> None:
|
|
77
|
+
super().__init__(timeout=timeout)
|
|
89
78
|
self._cleanup = download_directory is None
|
|
90
79
|
self._cookies_path = cookies_path
|
|
91
80
|
|
|
@@ -117,7 +106,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
117
106
|
if self._cleanup:
|
|
118
107
|
shutil.rmtree(self._download_directory, ignore_errors=True)
|
|
119
108
|
|
|
120
|
-
def
|
|
109
|
+
def download_video(self, url: str) -> str:
|
|
121
110
|
r"""Download the video and optionally split it into chunks.
|
|
122
111
|
|
|
123
112
|
yt-dlp will detect if the video is downloaded automatically so there
|
|
@@ -147,18 +136,21 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
147
136
|
|
|
148
137
|
def get_video_bytes(
|
|
149
138
|
self,
|
|
150
|
-
|
|
139
|
+
video_path: str,
|
|
151
140
|
) -> bytes:
|
|
152
|
-
r"""Download video by the
|
|
141
|
+
r"""Download video by the path, and return the content in bytes.
|
|
153
142
|
|
|
154
143
|
Args:
|
|
155
|
-
|
|
144
|
+
video_path (str): The path to the video file.
|
|
156
145
|
|
|
157
146
|
Returns:
|
|
158
147
|
bytes: The video file content in bytes.
|
|
159
148
|
"""
|
|
160
|
-
|
|
161
|
-
|
|
149
|
+
parsed_url = urlparse(video_path)
|
|
150
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
151
|
+
if is_url:
|
|
152
|
+
video_path = self.download_video(video_path)
|
|
153
|
+
video_file = video_path
|
|
162
154
|
|
|
163
155
|
with open(video_file, 'rb') as f:
|
|
164
156
|
video_bytes = f.read()
|
|
@@ -166,7 +158,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
166
158
|
return video_bytes
|
|
167
159
|
|
|
168
160
|
def get_video_screenshots(
|
|
169
|
-
self,
|
|
161
|
+
self, video_path: str, amount: int
|
|
170
162
|
) -> List[Image.Image]:
|
|
171
163
|
r"""Capture screenshots from the video at specified timestamps or by
|
|
172
164
|
dividing the video into equal parts if an integer is provided.
|
|
@@ -180,8 +172,11 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
180
172
|
"""
|
|
181
173
|
import ffmpeg
|
|
182
174
|
|
|
183
|
-
|
|
184
|
-
|
|
175
|
+
parsed_url = urlparse(video_path)
|
|
176
|
+
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
177
|
+
if is_url:
|
|
178
|
+
video_path = self.download_video(video_path)
|
|
179
|
+
video_file = video_path
|
|
185
180
|
|
|
186
181
|
# Get the video length
|
|
187
182
|
try:
|
|
@@ -206,6 +201,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
206
201
|
the functions in the toolkit.
|
|
207
202
|
"""
|
|
208
203
|
return [
|
|
204
|
+
FunctionTool(self.download_video),
|
|
209
205
|
FunctionTool(self.get_video_bytes),
|
|
210
206
|
FunctionTool(self.get_video_screenshots),
|
|
211
207
|
]
|