camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +4014 -410
- camel/agents/mcp_agent.py +30 -27
- camel/agents/repo_agent.py +2 -1
- camel/benchmarks/browsecomp.py +6 -6
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/configs/vllm_config.py +2 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/base_generator.py +39 -10
- camel/environments/__init__.py +12 -0
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +4 -16
- camel/interpreters/docker_interpreter.py +3 -2
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/internal_python_interpreter.py +51 -2
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +9 -0
- camel/loaders/firecrawl_reader.py +4 -4
- camel/logger.py +1 -1
- camel/memories/agent_memories.py +84 -1
- camel/memories/base.py +34 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/blocks/vectordb_block.py +8 -1
- camel/memories/context_creators/score_based.py +29 -237
- camel/memories/records.py +88 -8
- camel/messages/base.py +166 -40
- camel/messages/func_message.py +32 -5
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +117 -18
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +205 -91
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +189 -24
- camel/models/cohere_model.py +5 -17
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +6 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +71 -20
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +49 -32
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/model_manager.py +24 -6
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +185 -19
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +171 -46
- camel/models/openai_model.py +205 -77
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/configs.py +11 -11
- camel/runtimes/daytona_runtime.py +15 -16
- camel/runtimes/docker_runtime.py +6 -6
- camel/runtimes/remote_http_runtime.py +5 -5
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +2 -0
- camel/societies/role_playing.py +26 -28
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +249 -38
- camel/societies/workforce/role_playing_worker.py +82 -20
- camel/societies/workforce/single_agent_worker.py +634 -34
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +169 -23
- camel/societies/workforce/utils.py +176 -9
- camel/societies/workforce/worker.py +77 -23
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +3168 -478
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +203 -175
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +4 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/__init__.py +6 -0
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/surreal.py +365 -0
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +244 -27
- camel/toolkits/__init__.py +46 -8
- camel/toolkits/aci_toolkit.py +64 -19
- camel/toolkits/arxiv_toolkit.py +6 -6
- camel/toolkits/base.py +63 -5
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +10 -6
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +901 -67
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +30 -6
- camel/toolkits/github_toolkit.py +107 -20
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/human_toolkit.py +34 -10
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +370 -45
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/pptx_toolkit.py +25 -12
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +437 -142
- camel/toolkits/slack_toolkit.py +104 -50
- camel/toolkits/sympy_toolkit.py +1 -1
- camel/toolkits/task_planning_toolkit.py +3 -3
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +1 -1
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +106 -26
- camel/toolkits/video_download_toolkit.py +17 -14
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +316 -40
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +31 -4
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/mcp_client.py +45 -1
- camel/utils/message_summarizer.py +148 -0
- camel/utils/token_counting.py +43 -20
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/dalle_toolkit.py +0 -175
- camel/toolkits/file_write_toolkit.py +0 -444
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -17,6 +17,7 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import io
|
|
19
19
|
import os
|
|
20
|
+
import re
|
|
20
21
|
import tempfile
|
|
21
22
|
from pathlib import Path
|
|
22
23
|
from typing import List, Optional
|
|
@@ -41,6 +42,11 @@ VIDEO_QA_PROMPT = """
|
|
|
41
42
|
Analyze the provided video frames and corresponding audio transcription to \
|
|
42
43
|
answer the given question(s) thoroughly and accurately.
|
|
43
44
|
|
|
45
|
+
The transcriptions may come from two sources:
|
|
46
|
+
1. **Audio Transcription**: The spoken words in the video.
|
|
47
|
+
2. **Visual Text (OCR)**: Text extracted from the video frames (like \
|
|
48
|
+
captions, on-screen text, etc.).
|
|
49
|
+
|
|
44
50
|
Instructions:
|
|
45
51
|
1. Visual Analysis:
|
|
46
52
|
- Examine the video frames to identify visible entities.
|
|
@@ -49,11 +55,13 @@ such as size, color, shape, texture, or behavior.
|
|
|
49
55
|
- Note significant groupings, interactions, or contextual patterns \
|
|
50
56
|
relevant to the analysis.
|
|
51
57
|
|
|
52
|
-
2. Audio Integration:
|
|
58
|
+
2. Audio and Text Integration:
|
|
53
59
|
- Use the audio transcription to complement or clarify your visual \
|
|
54
60
|
observations.
|
|
61
|
+
- Use the visual text (OCR) to get exact textual information that may \
|
|
62
|
+
not be accurately readable from the images alone.
|
|
55
63
|
- Identify names, descriptions, or contextual hints in the \
|
|
56
|
-
|
|
64
|
+
transcriptions that help confirm or refine your visual analysis.
|
|
57
65
|
|
|
58
66
|
3. Detailed Reasoning and Justification:
|
|
59
67
|
- Provide a brief explanation of how you identified and distinguished \
|
|
@@ -65,7 +73,7 @@ your reasoning.
|
|
|
65
73
|
- Specify the total number of distinct species or object types \
|
|
66
74
|
identified in the video.
|
|
67
75
|
- Describe the defining characteristics and any supporting evidence \
|
|
68
|
-
from the video and transcription.
|
|
76
|
+
from the video and transcription sources.
|
|
69
77
|
|
|
70
78
|
5. Important Considerations:
|
|
71
79
|
- Pay close attention to subtle differences that could distinguish \
|
|
@@ -76,6 +84,9 @@ similar-looking species or objects
|
|
|
76
84
|
**Audio Transcription:**
|
|
77
85
|
{audio_transcription}
|
|
78
86
|
|
|
87
|
+
**Visual Text (OCR):**
|
|
88
|
+
{visual_text}
|
|
89
|
+
|
|
79
90
|
**Question:**
|
|
80
91
|
{question}
|
|
81
92
|
"""
|
|
@@ -86,7 +97,7 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
86
97
|
r"""A class for analysing videos with vision-language model.
|
|
87
98
|
|
|
88
99
|
Args:
|
|
89
|
-
|
|
100
|
+
working_directory (Optional[str], optional): The directory where the
|
|
90
101
|
video will be downloaded to. If not provided, video will be stored
|
|
91
102
|
in a temporary directory and will be cleaned up after use.
|
|
92
103
|
(default: :obj:`None`)
|
|
@@ -96,6 +107,8 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
96
107
|
transcription using OpenAI's audio models. Requires a valid OpenAI
|
|
97
108
|
API key. When disabled, video analysis will be based solely on
|
|
98
109
|
visual content. (default: :obj:`False`)
|
|
110
|
+
use_ocr (bool, optional): Whether to enable OCR for extracting text
|
|
111
|
+
from video frames. (default: :obj:`False`)
|
|
99
112
|
frame_interval (float, optional): Interval in seconds between frames
|
|
100
113
|
to extract from the video. (default: :obj:`4.0`)
|
|
101
114
|
output_language (str, optional): The language for output responses.
|
|
@@ -110,38 +123,40 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
110
123
|
@dependencies_required("ffmpeg", "scenedetect")
|
|
111
124
|
def __init__(
|
|
112
125
|
self,
|
|
113
|
-
|
|
126
|
+
working_directory: Optional[str] = None,
|
|
114
127
|
model: Optional[BaseModelBackend] = None,
|
|
115
128
|
use_audio_transcription: bool = False,
|
|
129
|
+
use_ocr: bool = False,
|
|
116
130
|
frame_interval: float = 4.0,
|
|
117
131
|
output_language: str = "English",
|
|
118
132
|
cookies_path: Optional[str] = None,
|
|
119
133
|
timeout: Optional[float] = None,
|
|
120
134
|
) -> None:
|
|
121
135
|
super().__init__(timeout=timeout)
|
|
122
|
-
self._cleanup =
|
|
136
|
+
self._cleanup = working_directory is None
|
|
123
137
|
self._temp_files: list[str] = [] # Track temporary files for cleanup
|
|
124
138
|
self._use_audio_transcription = use_audio_transcription
|
|
139
|
+
self._use_ocr = use_ocr
|
|
125
140
|
self.output_language = output_language
|
|
126
141
|
self.frame_interval = frame_interval
|
|
127
142
|
|
|
128
|
-
self.
|
|
129
|
-
|
|
143
|
+
self._working_directory = Path(
|
|
144
|
+
working_directory or tempfile.mkdtemp()
|
|
130
145
|
).resolve()
|
|
131
146
|
|
|
132
147
|
self.video_downloader_toolkit = VideoDownloaderToolkit(
|
|
133
|
-
|
|
148
|
+
working_directory=str(self._working_directory),
|
|
134
149
|
cookies_path=cookies_path,
|
|
135
150
|
)
|
|
136
151
|
|
|
137
152
|
try:
|
|
138
|
-
self.
|
|
153
|
+
self._working_directory.mkdir(parents=True, exist_ok=True)
|
|
139
154
|
except OSError as e:
|
|
140
155
|
raise ValueError(
|
|
141
|
-
f"Error creating directory {self.
|
|
156
|
+
f"Error creating directory {self._working_directory}: {e}"
|
|
142
157
|
)
|
|
143
158
|
|
|
144
|
-
logger.info(f"Video will be downloaded to {self.
|
|
159
|
+
logger.info(f"Video will be downloaded to {self._working_directory}")
|
|
145
160
|
|
|
146
161
|
self.vl_model = model
|
|
147
162
|
# Ensure ChatAgent is initialized with a model if provided
|
|
@@ -180,27 +195,33 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
180
195
|
destroyed.
|
|
181
196
|
"""
|
|
182
197
|
# Clean up temporary files
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
198
|
+
if hasattr(self, '_temp_files'):
|
|
199
|
+
for temp_file in self._temp_files:
|
|
200
|
+
if os.path.exists(temp_file):
|
|
201
|
+
try:
|
|
202
|
+
os.remove(temp_file)
|
|
203
|
+
logger.debug(f"Removed temporary file: {temp_file}")
|
|
204
|
+
except OSError as e:
|
|
205
|
+
logger.warning(
|
|
206
|
+
f"Failed to remove temporary file {temp_file}: {e}"
|
|
207
|
+
)
|
|
192
208
|
|
|
193
209
|
# Clean up temporary directory if needed
|
|
194
|
-
if
|
|
210
|
+
if (
|
|
211
|
+
hasattr(self, '_cleanup')
|
|
212
|
+
and self._cleanup
|
|
213
|
+
and hasattr(self, '_working_directory')
|
|
214
|
+
and os.path.exists(self._working_directory)
|
|
215
|
+
):
|
|
195
216
|
try:
|
|
196
217
|
import sys
|
|
197
218
|
|
|
198
219
|
if getattr(sys, 'modules', None) is not None:
|
|
199
220
|
import shutil
|
|
200
221
|
|
|
201
|
-
shutil.rmtree(self.
|
|
222
|
+
shutil.rmtree(self._working_directory)
|
|
202
223
|
logger.debug(
|
|
203
|
-
f"Removed temp directory: {self.
|
|
224
|
+
f"Removed temp directory: {self._working_directory}"
|
|
204
225
|
)
|
|
205
226
|
except (ImportError, AttributeError):
|
|
206
227
|
# Skip cleanup if interpreter is shutting down
|
|
@@ -208,9 +229,56 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
208
229
|
except OSError as e:
|
|
209
230
|
logger.warning(
|
|
210
231
|
f"Failed to remove temporary directory "
|
|
211
|
-
f"{self.
|
|
232
|
+
f"{self._working_directory}: {e}"
|
|
212
233
|
)
|
|
213
234
|
|
|
235
|
+
@dependencies_required("pytesseract", "cv2", "numpy")
|
|
236
|
+
def _extract_text_from_frame(self, frame: Image.Image) -> str:
|
|
237
|
+
r"""Extract text from a video frame using OCR.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
frame (Image.Image): PIL image frame to process.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
str: Extracted text from the frame.
|
|
244
|
+
"""
|
|
245
|
+
import cv2
|
|
246
|
+
import numpy as np
|
|
247
|
+
import pytesseract
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
# Convert to OpenCV format for preprocessing
|
|
251
|
+
cv_image = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
|
|
252
|
+
|
|
253
|
+
# Preprocessing for better OCR results
|
|
254
|
+
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
|
|
255
|
+
blur = cv2.GaussianBlur(gray, (3, 3), 0)
|
|
256
|
+
_, threshold = cv2.threshold(
|
|
257
|
+
blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Convert back to PIL image for OCR
|
|
261
|
+
preprocessed_frame = Image.fromarray(threshold)
|
|
262
|
+
return pytesseract.image_to_string(preprocessed_frame).strip()
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.error(f"OCR failed: {e}")
|
|
265
|
+
return ""
|
|
266
|
+
|
|
267
|
+
def _process_extracted_text(self, text: str) -> str:
|
|
268
|
+
r"""Clean and format OCR-extracted text.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
text (str): Raw extracted OCR text.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
str: Cleaned and formatted text.
|
|
275
|
+
"""
|
|
276
|
+
# Filter irrelevant characters and noise
|
|
277
|
+
text = re.sub(r'[^\w\s,.?!:;\'"()-]', '', text)
|
|
278
|
+
# Remove excessive whitespace
|
|
279
|
+
text = re.sub(r'\s+', ' ', text).strip()
|
|
280
|
+
return text
|
|
281
|
+
|
|
214
282
|
def _extract_audio_from_video(
|
|
215
283
|
self, video_path: str, output_format: str = "mp3"
|
|
216
284
|
) -> str:
|
|
@@ -511,16 +579,28 @@ class VideoAnalysisToolkit(BaseToolkit):
|
|
|
511
579
|
audio_path = self._extract_audio_from_video(video_path)
|
|
512
580
|
audio_transcript = self._transcribe_audio(audio_path)
|
|
513
581
|
|
|
582
|
+
# Extract visual text with OCR
|
|
583
|
+
visual_text = ""
|
|
514
584
|
video_frames = self._extract_keyframes(video_path)
|
|
585
|
+
# Build visual text only if OCR is enabled
|
|
586
|
+
if self._use_ocr:
|
|
587
|
+
for frame in video_frames:
|
|
588
|
+
text = self._extract_text_from_frame(frame)
|
|
589
|
+
processed = self._process_extracted_text(text)
|
|
590
|
+
if processed:
|
|
591
|
+
visual_text += processed + "\n"
|
|
592
|
+
visual_text = visual_text.strip() or "No visual text detected."
|
|
593
|
+
|
|
515
594
|
prompt = VIDEO_QA_PROMPT.format(
|
|
516
595
|
audio_transcription=audio_transcript,
|
|
596
|
+
visual_text=visual_text,
|
|
517
597
|
question=question,
|
|
518
598
|
)
|
|
519
599
|
|
|
520
600
|
msg = BaseMessage.make_user_message(
|
|
521
601
|
role_name="User",
|
|
522
602
|
content=prompt,
|
|
523
|
-
image_list=video_frames,
|
|
603
|
+
image_list=video_frames, # type: ignore[arg-type]
|
|
524
604
|
)
|
|
525
605
|
# Reset the agent to clear previous state
|
|
526
606
|
self.vl_agent.reset()
|
|
@@ -26,7 +26,7 @@ from PIL import Image
|
|
|
26
26
|
from camel.logger import get_logger
|
|
27
27
|
from camel.toolkits.base import BaseToolkit
|
|
28
28
|
from camel.toolkits.function_tool import FunctionTool
|
|
29
|
-
from camel.utils import
|
|
29
|
+
from camel.utils import dependencies_required
|
|
30
30
|
|
|
31
31
|
logger = get_logger(__name__)
|
|
32
32
|
|
|
@@ -57,13 +57,12 @@ def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
|
|
|
57
57
|
return Image.open(io.BytesIO(out))
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
@MCPServer()
|
|
61
60
|
class VideoDownloaderToolkit(BaseToolkit):
|
|
62
61
|
r"""A class for downloading videos and optionally splitting them into
|
|
63
62
|
chunks.
|
|
64
63
|
|
|
65
64
|
Args:
|
|
66
|
-
|
|
65
|
+
working_directory (Optional[str], optional): The directory where the
|
|
67
66
|
video will be downloaded to. If not provided, video will be stored
|
|
68
67
|
in a temporary directory and will be cleaned up after use.
|
|
69
68
|
(default: :obj:`None`)
|
|
@@ -74,30 +73,30 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
74
73
|
@dependencies_required("yt_dlp", "ffmpeg")
|
|
75
74
|
def __init__(
|
|
76
75
|
self,
|
|
77
|
-
|
|
76
|
+
working_directory: Optional[str] = None,
|
|
78
77
|
cookies_path: Optional[str] = None,
|
|
79
78
|
timeout: Optional[float] = None,
|
|
80
79
|
) -> None:
|
|
81
80
|
super().__init__(timeout=timeout)
|
|
82
|
-
self._cleanup =
|
|
81
|
+
self._cleanup = working_directory is None
|
|
83
82
|
self._cookies_path = cookies_path
|
|
84
83
|
|
|
85
|
-
self.
|
|
86
|
-
|
|
84
|
+
self._working_directory = Path(
|
|
85
|
+
working_directory or tempfile.mkdtemp()
|
|
87
86
|
).resolve()
|
|
88
87
|
|
|
89
88
|
try:
|
|
90
|
-
self.
|
|
89
|
+
self._working_directory.mkdir(parents=True, exist_ok=True)
|
|
91
90
|
except FileExistsError:
|
|
92
91
|
raise ValueError(
|
|
93
|
-
f"{self.
|
|
92
|
+
f"{self._working_directory} is not a valid directory."
|
|
94
93
|
)
|
|
95
94
|
except OSError as e:
|
|
96
95
|
raise ValueError(
|
|
97
|
-
f"Error creating directory {self.
|
|
96
|
+
f"Error creating directory {self._working_directory}: {e}"
|
|
98
97
|
)
|
|
99
98
|
|
|
100
|
-
logger.info(f"Video will be downloaded to {self.
|
|
99
|
+
logger.info(f"Video will be downloaded to {self._working_directory}")
|
|
101
100
|
|
|
102
101
|
def __del__(self) -> None:
|
|
103
102
|
r"""Deconstructor for the VideoDownloaderToolkit class.
|
|
@@ -112,7 +111,7 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
112
111
|
if getattr(sys, 'modules', None) is not None:
|
|
113
112
|
import shutil
|
|
114
113
|
|
|
115
|
-
shutil.rmtree(self.
|
|
114
|
+
shutil.rmtree(self._working_directory, ignore_errors=True)
|
|
116
115
|
except (ImportError, AttributeError):
|
|
117
116
|
# Skip cleanup if interpreter is shutting down
|
|
118
117
|
pass
|
|
@@ -123,12 +122,15 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
123
122
|
yt-dlp will detect if the video is downloaded automatically so there
|
|
124
123
|
is no need to check if the video exists.
|
|
125
124
|
|
|
125
|
+
Args:
|
|
126
|
+
url (str): The URL of the video to download.
|
|
127
|
+
|
|
126
128
|
Returns:
|
|
127
129
|
str: The path to the downloaded video file.
|
|
128
130
|
"""
|
|
129
131
|
import yt_dlp
|
|
130
132
|
|
|
131
|
-
video_template = self.
|
|
133
|
+
video_template = self._working_directory / "%(title)s.%(ext)s"
|
|
132
134
|
ydl_opts = {
|
|
133
135
|
'format': 'bestvideo+bestaudio/best',
|
|
134
136
|
'outtmpl': str(video_template),
|
|
@@ -175,7 +177,8 @@ class VideoDownloaderToolkit(BaseToolkit):
|
|
|
175
177
|
dividing the video into equal parts if an integer is provided.
|
|
176
178
|
|
|
177
179
|
Args:
|
|
178
|
-
|
|
180
|
+
video_path (str): The local path or URL of the video to take
|
|
181
|
+
screenshots.
|
|
179
182
|
amount (int): the amount of evenly split screenshots to capture.
|
|
180
183
|
|
|
181
184
|
Returns:
|